Compare commits
692 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2b4062f964 | |||
| 1dba69095d | |||
| c96eb06dc4 | |||
| f6cfff4ac6 | |||
| f1a8e99942 | |||
| da6019820a | |||
| 5044e1cbf1 | |||
| d8b85bfd1c | |||
| 7df6115199 | |||
| b71f80e6ce | |||
| 33bf5f6292 | |||
| d514dd4055 | |||
| 946ef0ea19 | |||
| a345f7b6e5 | |||
| a2ff193050 | |||
| b1d420e75f | |||
| 28299afc21 | |||
| 441ef75d15 | |||
| 48c241840a | |||
| 94016dd1aa | |||
| 5c906d7026 | |||
| cd2cbc73b7 | |||
| 6388aafbd6 | |||
| a24789d738 | |||
| 773cf48c50 | |||
| ad7aad251c | |||
| 9627ee70e5 | |||
| 63c51d8962 | |||
| b62a82e0c3 | |||
| 90a7adcb2e | |||
| 3ce1233ae4 | |||
| 906881c38b | |||
| a0fedfbb1b | |||
| b045e7a2ba | |||
| 76074d9ee6 | |||
| 17687911b7 | |||
| b1e0ef82f6 | |||
| a0556b861f | |||
| ca5febfed1 | |||
| e45df2e81e | |||
| a869a523ee | |||
| 043a118d41 | |||
| e70e49016f | |||
| a6f5f9c484 | |||
| 466f3a11de | |||
| 629d8b843d | |||
| 68162eb18f | |||
| d78c34928f | |||
| 3ebdd26449 | |||
| 395dbcc873 | |||
| aa88dcc57b | |||
| f27fcb6a82 | |||
| 477e4a2fe6 | |||
| e598e18529 | |||
| 39f451f5ad | |||
| 985133852a | |||
| fab3ad9777 | |||
| a49670c21b | |||
| 3f97297413 | |||
| d2c6eceed9 | |||
| 8a1a42d098 | |||
| b28ab4fc3f | |||
| 6d302b340e | |||
| eda326df16 | |||
| f0b95cc93d | |||
| 2d4eaed111 | |||
| 735349c679 | |||
| c4b287ba53 | |||
| 0d41e94ca9 | |||
| ee8edd4169 | |||
| 3188e63b05 | |||
| 3082fa0829 | |||
| 1efed67056 | |||
| 56b4795115 | |||
| f0d278412f | |||
| 0b9cbc8b23 | |||
| 50ab0a85a7 | |||
| 0d945d1541 | |||
| f97d022149 | |||
| 05cdcac362 | |||
| 74e4f5f97a | |||
| a321874ab4 | |||
| a11234dd68 | |||
| a860a1098f | |||
| 1c42d8ff53 | |||
| 92a08c633f | |||
| 9a0a4c5831 | |||
| 1fc8733a69 | |||
| 587ef55f2c | |||
| 144ba71a33 | |||
| 391e3fff56 | |||
| 39560c948d | |||
| ca8e68822d | |||
| f13b349b9a | |||
| bb2b129549 | |||
| 5bd75c73ed | |||
| 79902a0278 | |||
| 15be493055 | |||
| 5f8e59b0f1 | |||
| 1b1037171b | |||
| de0ac21fff | |||
| 398efdb0fa | |||
| 80c579a9dd | |||
| 3beef57825 | |||
| 7cc00087e7 | |||
| 0df80f4391 | |||
| 8fa5a03752 | |||
| b1476c76f6 | |||
| 794f48766c | |||
| acca3ec3af | |||
| af312ccc97 | |||
| 7b05ccddc7 | |||
| 84ec27616a | |||
| 9022804d78 | |||
| 20a4f79ed1 | |||
| 2b500ed68a | |||
| e4723f671a | |||
| b6e4e40df4 | |||
| 91f339b981 | |||
| 72c33dfe95 | |||
| f67063ba81 | |||
| ec7f2f249e | |||
| 00d25595c1 | |||
| ee502e5640 | |||
| 0dc677f071 | |||
| c28c2a2380 | |||
| d5357f816d | |||
| 41545f7ec5 | |||
| 0664bf961a | |||
| 58f93fb7d3 | |||
| 2d5f20684a | |||
| c85a25faaa | |||
| 27a8ba42ed | |||
| ce9888b52a | |||
| a6289927d3 | |||
| 3b750715a3 | |||
| 0397be5939 | |||
| 87b113c2e3 | |||
| 60235dba5e | |||
| cc2c820975 | |||
| e8e9147377 | |||
| dbe9b15fa1 | |||
| f8ba265340 | |||
| 8c0f254c06 | |||
| 244bacd0dc | |||
| 4553e32bc4 | |||
| a877c3f6d9 | |||
| 77a102b7de | |||
| 526742199b | |||
| 12135b4c8a | |||
| 0120d8f31e | |||
| d9f0875591 | |||
| 247c9d468c | |||
| 935cf2fcca | |||
| 6430d67569 | |||
| 269be4ec84 | |||
| d8097d587f | |||
| c62d8c9b74 | |||
| dad62c4c47 | |||
| 45949e944a | |||
| e4e0090b54 | |||
| 5795b3be4e | |||
| e805380b82 | |||
| ecc909de38 | |||
| db84c1535d | |||
| 8e18d10318 | |||
| b014a3d315 | |||
| 969bfff449 | |||
| de9238d37e | |||
| 7de3c86c5a | |||
| b7bd177105 | |||
| 7530ce04e0 | |||
| 02147cc850 | |||
| 8ebb81fd76 | |||
| c46bc92949 | |||
| fb311952d7 | |||
| 285c208cf7 | |||
| 3b16c590e0 | |||
| 349d0da07e | |||
| 4e6f51167d | |||
| 37b5731694 | |||
| f6677748a0 | |||
| f844e516d8 | |||
| 19eebf6e0d | |||
| 96514de472 | |||
| c7fc5af122 | |||
| 80b386a472 | |||
| 314361733f | |||
| 7f735b4db2 | |||
| 34c6f93496 | |||
| c1a2710a32 | |||
| 9e893d16d1 | |||
| 44cf33449d | |||
| 005b2f4c5d | |||
| f15b0fbb4f | |||
| b10e38e392 | |||
| fe8560fc12 | |||
| 436672de0e | |||
| 4f76166cf0 | |||
| 0a7cc85eab | |||
| 046c293183 | |||
| 8f4c0bf088 | |||
| 83a07f4759 | |||
| 9e0ef2a1bc | |||
| efe1cb00c8 | |||
| 4577f392f9 | |||
| 6b76ea4707 | |||
| 354502ee48 | |||
| cca8587d35 | |||
| 4d0f59fa5a | |||
| 68c1a08ad1 | |||
| 5168226d60 | |||
| b93643c8fe | |||
| 2eef395e1c | |||
| c725d7d648 | |||
| 660ce7c54b | |||
| 1a03e3b1c6 | |||
| f6b68f0f50 | |||
| aacf36e943 | |||
| fe8dc26bc9 | |||
| 4a3e3e20e5 | |||
| f8a6db68ca | |||
| b22b3f506a | |||
| d472d697cd | |||
| 8c82d0664d | |||
| 2a285d5ec2 | |||
| 28f4d6db63 | |||
| 542e06c789 | |||
| fc4aa66ee4 | |||
| f25d3ec917 | |||
| ca5595fe7b | |||
| 91ce8fc000 | |||
| 8ad5e98f8d | |||
| 2785355750 | |||
| c3112adac5 | |||
| 13a7cbcd64 | |||
| 601e5f1d57 | |||
| 2333b7a7ec | |||
| 3f023450dd | |||
| 69aeba0df7 | |||
| 10f89d7b72 | |||
| 93869b48ab | |||
| ef94aa201f | |||
| c77a6e3faa | |||
| 1d938832a7 | |||
| f7918c9349 | |||
| a1bed18194 | |||
| d12f59aa53 | |||
| b816fd4e26 | |||
| b632290166 | |||
| 20428f5e60 | |||
| 109c3e468c | |||
| 9fa3a093f2 | |||
| 9644b8ae67 | |||
| b8fb9270c4 | |||
| 56a78e74b2 | |||
| 429b8eceb4 | |||
| e493b1c482 | |||
| 20859cc408 | |||
| 50aabb9eb2 | |||
| 8fabef9d35 | |||
| 81cd678291 | |||
| 60b143e9df | |||
| 645a2f482d | |||
| a919269eb5 | |||
| 9cda237bb1 | |||
| eadf34633e | |||
| c050ee6573 | |||
| fbc477df71 | |||
| 64ad7dec0d | |||
| 9e2628ee7c | |||
| 1c7f47a58c | |||
| 6875471916 | |||
| 75bce317a3 | |||
| fd9c32c0f2 | |||
| 20edca75e9 | |||
| 103f51ad34 | |||
| 8ab9f61dcf | |||
| d90f73bcec | |||
| a21f364ad7 | |||
| 1c7c7c3c5f | |||
| 2bc82bb504 | |||
| 3db6b9cc87 | |||
| d35efb9898 | |||
| 1381c89e56 | |||
| 1a9542cf75 | |||
| a7683d04a9 | |||
| 25065283b3 | |||
| d6615d8ec7 | |||
| 0ce1b9fe20 | |||
| d9c090fe36 | |||
| 54e78cadb2 | |||
| 38adfebe78 | |||
| cfd86dcdb8 | |||
| d89e7a3cd4 | |||
| a7417f8a4a | |||
| eeb05cf556 | |||
| 74c1b946e0 | |||
| ce22301dc6 | |||
| 83080772f2 | |||
| 7a8ee8b29d | |||
| 0b5fd40a01 | |||
| 42d72b5922 | |||
| 5d6431c114 | |||
| 0e9416036a | |||
| 0cc63043e0 | |||
| 6b4ccb9b14 | |||
| b46b0c9888 | |||
| ef8c213e88 | |||
| 52882dade6 | |||
| 0443484115 | |||
| 6cf7a9e330 | |||
| b7bbc62503 | |||
| d29f90e89d | |||
| 026a5e47df | |||
| 3fb35520c6 | |||
| 25b7b0f8e6 | |||
| ff3d2773e2 | |||
| fdf9343c51 | |||
| 6f864f8f94 | |||
| a175f39577 | |||
| 69fc6d9c1e | |||
| d3b22b76d8 | |||
| 1bd5ac7f2f | |||
| a79b0ec461 | |||
| 3ccf723bf9 | |||
| 8c8f95bc8e | |||
| c5789f4309 | |||
| 297eaa3533 | |||
| b2b479b40e | |||
| a8b689f0c2 | |||
| 6b3efcee49 | |||
| 652f8e6f3e | |||
| edf9c75621 | |||
| ae40fca955 | |||
| a31477dabb | |||
| 60c4bc96fd | |||
| da8654bb41 | |||
| 239ea1bdea | |||
| 75b4a34670 | |||
| 5ec6baa400 | |||
| 135b4c8b35 | |||
| 0d563621fb | |||
| d1d2d43387 | |||
| 844d4a32ce | |||
| 110387d149 | |||
| af6f9bc2a1 | |||
| 33f554d83c | |||
| a219a0a4df | |||
| 412f2389f1 | |||
| e50809b771 | |||
| 5b6d413476 | |||
| f720751d79 | |||
| 055fde40e0 | |||
| e69d11d30c | |||
| 46072425fe | |||
| c8ecb56f27 | |||
| e3461e0b2a | |||
| cba86b7303 | |||
| 6b88f46c54 | |||
| a45bd28598 | |||
| d2ea959fe9 | |||
| d17eff29d5 | |||
| 2d3d1d9736 | |||
| 145a38a875 | |||
| 0896944382 | |||
| 9c64d09610 | |||
| 64b39d835e | |||
| 20a06c586f | |||
| 06a6d6967a | |||
| 986ec04048 | |||
| 0628004709 | |||
| c659a16899 | |||
| 08b8465ca9 | |||
| 51dc98d314 | |||
| 0df7e61d2c | |||
| 52c539d53a | |||
| 3c070f9f9d | |||
| bff484a51b | |||
| 2a52e28568 | |||
| 7d36533aeb | |||
| 99faac212e | |||
| 6da970f15d | |||
| 4e2b20b705 | |||
| ba8337464d | |||
| f6aa1965d7 | |||
| ad4542bf6d | |||
| 54cd633366 | |||
| e2248045f5 | |||
| d7663c7808 | |||
| f236cbfec3 | |||
| dc63ad0ad2 | |||
| 83bbe9b458 | |||
| e2211b2683 | |||
| 3e1559b910 | |||
| baf834cc0f | |||
| abcaf05229 | |||
| 21c7c9f0ca | |||
| cac4f2c0e6 | |||
| deb59eab72 | |||
| 9faaa292b4 | |||
| cb33c73418 | |||
| 8a364df2c8 | |||
| aede94e757 | |||
| 2c7d7a9b2f | |||
| cdde0c8411 | |||
| 45fd45103d | |||
| c653f5dc3f | |||
| 8bdec80882 | |||
| d8be50d772 | |||
| 06031229e8 | |||
| 9c93fc5775 | |||
| 74c997d985 | |||
| c857592558 | |||
| e8cdcf5328 | |||
| 8a4fe80f8d | |||
| e89376d66f | |||
| 81ce945450 | |||
| df88375f0d | |||
| ccb5d87076 | |||
| a1cb811cb8 | |||
| 314fe9f827 | |||
| 645b99aadd | |||
| 78b635ee3c | |||
| 91ea3ae4b2 | |||
| 3072e5543b | |||
| c90f25dd1f | |||
| 744079ffe6 | |||
| c0300575c1 | |||
| 1964b0565b | |||
| 8163d37192 | |||
| a11aed1acc | |||
| 434d70d8bc | |||
| 5671059f62 | |||
| 95f395027f | |||
| 2f2998bb1b | |||
| 363cc93674 | |||
| 808fee151d | |||
| 74636f9c4a | |||
| 222767e5e8 | |||
| 6fda92aa7f | |||
| 1bd975c0ba | |||
| b58db237e4 | |||
| 6713274a42 | |||
| 2d7543c61f | |||
| 2ababfe6ed | |||
| 3c42024539 | |||
| 3792b77bd1 | |||
| 86e64c1d3b | |||
| 408dd8aa28 | |||
| 5bd937533c | |||
| 6c4aca7adc | |||
| a5cae16496 | |||
| 65bebb9b80 | |||
| dfdd7b6e6f | |||
| 4a2f822137 | |||
| 2658494e81 | |||
| f5bd77b3e1 | |||
| 7e780f4832 | |||
| 167b5648ea | |||
| 9eaddfafa3 | |||
| b8ae8cc801 | |||
| c9a3f36f56 | |||
| 0dd8e3f8d8 | |||
| 511add7249 | |||
| e97a9993b9 | |||
| 279b656adc | |||
| e527240b27 | |||
| 6b4fb9f878 | |||
| 69dd0f7cf1 | |||
| 3c59566cc5 | |||
| b59bb4e351 | |||
| d87fd9f039 | |||
| 55647a5813 | |||
| 6f2dab248a | |||
| 1148c46241 | |||
| 7a22c639dc | |||
| 934103476f | |||
| bf3239472f | |||
| f1e0292517 | |||
| 0a97ce6bff | |||
| 6c1322b997 | |||
| c14bf441a3 | |||
| 19ba9e43b6 | |||
| 5d5b8912be | |||
| c4c0e5abc2 | |||
| 457c7b76cd | |||
| 9b5b88b5e0 | |||
| a22465e07a | |||
| 9987f3d824 | |||
| 19854c7cd2 | |||
| eb612f5574 | |||
| b294d1d022 | |||
| 72c8037a24 | |||
| ef9a08a872 | |||
| e26f9b2070 | |||
| 4f37669170 | |||
| d409a4409c | |||
| 5d3be898a8 | |||
| af98122793 | |||
| 73bcd83dba | |||
| 762eb79f1e | |||
| 38dd057e91 | |||
| e444d8f29c | |||
| 13f344c5ce | |||
| 1dce908930 | |||
| 50f9f389ec | |||
| 7696ddc59e | |||
| 5eac6084bc | |||
| e363ced3c3 | |||
| 292d2fb42f | |||
| 0a6865b328 | |||
| 9c626ef8ea | |||
| 2ef1ad280b | |||
| 10297fa23c | |||
| 6ec74aec07 | |||
| 8825e9044c | |||
| 2470434d60 | |||
| 9bf260472b | |||
| 699b3679bc | |||
| 98c98821ff | |||
| c5e3a6fb5b | |||
| e2cea6eeba | |||
| c73594fe41 | |||
| 97acd66b4c | |||
| f98b5d00a4 | |||
| 585d6778da | |||
| f903ceece0 | |||
| d05a87e686 | |||
| a147164d3c | |||
| 5cdc39e29a | |||
| 2b3923ff13 | |||
| a717199bbf | |||
| 8fcc160f6b | |||
| f34d298495 | |||
| 0ab2d752ff | |||
| 7cda0e5224 | |||
| 0b76d23d1a | |||
| f99676e315 | |||
| 77c0bc6b13 | |||
| c5b4c48165 | |||
| 20132435c0 | |||
| 5ad030d19d | |||
| 05c63259b5 | |||
| a01c1f7305 | |||
| 75e1339d4c | |||
| 0159f25fd0 | |||
| b7ad3f478f | |||
| a2a32688ca | |||
| a49f4c617d | |||
| dfe512c58d | |||
| c6eebfc25a | |||
| cf2b2d31ce | |||
| 2af8b8ff37 | |||
| 9cb5baeacf | |||
| 9ca72a69a7 | |||
| 77dd6d5469 | |||
| 1be3b74cfb | |||
| 265bd59c1d | |||
| 7c6c5619a7 | |||
| 50c046331d | |||
| 4caad285a6 | |||
| e2eb561e8e | |||
| bfb704684e | |||
| f0dc919f92 | |||
| 41fa1f1b5c | |||
| fc78e708ed | |||
| ec1443b9f1 | |||
| 78886365c2 | |||
| e27b0b7651 | |||
| 8fa44b1724 | |||
| 0704589ceb | |||
| 58b89965c8 | |||
| c23c7c994b | |||
| 8d7500d80d | |||
| 27ec74c68a | |||
| bb706c3f38 | |||
| a94841eaa0 | |||
| 7ba1a2b3df | |||
| 55366510e5 | |||
| 787b5c5f93 | |||
| ab6c629ccc | |||
| ccfe6a47c3 | |||
| 24130b7e53 | |||
| 158eb32686 | |||
| adaee2c72c | |||
| e21898ea98 | |||
| fa7b0b0a67 | |||
| 82b5786721 | |||
| 73a6b80317 | |||
| ec4cb16a29 | |||
| bea2562fc4 | |||
| b94cb8e2c4 | |||
| fa9fd26acb | |||
| 7abc9ce4df | |||
| a178081468 | |||
| bdb7edd89e | |||
| 5ed27c0f74 | |||
| 531ac20408 | |||
| cc340c4a4d | |||
| 19136dfc07 | |||
| 9a75743496 | |||
| e3624e00db | |||
| 8e58265b60 | |||
| ebe60abc4f | |||
| 33d24095c4 | |||
| 01cc701e54 | |||
| 2b512cbca4 | |||
| 25cbe3e1d6 | |||
| f48ba47d1e | |||
| 226fd79c8e | |||
| 0ddc8aba68 | |||
| 38875d00a7 | |||
| 5089c55e0b | |||
| 142b4bf3ce | |||
| c8e506c383 | |||
| f4c761c6a0 | |||
| 26f7f68507 | |||
| 36fa8a4d28 | |||
| 443950e827 | |||
| 96691268df | |||
| 77fe7ab6b2 | |||
| 84324d06b8 | |||
| 8b7b074df9 | |||
| b194617d00 | |||
| 2997ef9446 | |||
| a83d579d5b | |||
| 9ae1fa9e39 | |||
| b29b709a71 | |||
| f43b126677 | |||
| 1ef9e88549 | |||
| 447a2bba3a | |||
| ca9a61ae38 | |||
| 79cffa9232 | |||
| 2bf73fbe2c | |||
| 7cbe943d2d | |||
| 21cc9c8d32 | |||
| f7dfd4ae36 | |||
| 2110a3a0c4 | |||
| 5f3f456784 | |||
| f4ba97ad9a | |||
| 75483b6db1 | |||
| aab5bcc6ac | |||
| 5ad8281885 | |||
| 1e5a23fa64 | |||
| 67f1198ba9 | |||
| d5e72ae17f | |||
| a5d60f42ee | |||
| 09aba91766 | |||
| f59693c075 | |||
| c997830e1e | |||
| 4a6fac36d8 | |||
| 624057fce6 | |||
| 97d6f25008 | |||
| f61695ee73 | |||
| e2e6b6ff1a | |||
| c73b799de7 | |||
| a52363231f | |||
| 9550d0fd46 | |||
| 7dc85495e0 | |||
| 6549b0f2b7 | |||
| e2a4905606 | |||
| e5dad4ac57 | |||
| 180a7036bc | |||
| 8fed969618 | |||
| ded011c5a5 | |||
| 71b685aee0 | |||
| bbbce92651 | |||
| 80a676658c | |||
| c868425467 | |||
| 59c1a13f45 | |||
| 1d8068d71d | |||
| 9ac4a2e53e | |||
| 6bc5d72271 | |||
| b737af8226 | |||
| 73bf3ab1b2 | |||
| 76edc40ab0 | |||
| b9b9ee3e6c | |||
| 8fbc9d7d78 | |||
| 699a9c11a9 | |||
| d60a9917d3 | |||
| 7c07422202 | |||
| ca7f46beb5 | |||
| cb0e2e2f36 | |||
| 4c0cc77e94 | |||
| 9b62c98170 | |||
| 469e4df3c2 | |||
| ae11a31058 | |||
| 3e200b64fb | |||
| 1745cfc6d7 | |||
| 58c07867e3 | |||
| 4523965de9 |
@@ -9,6 +9,12 @@ node_modules
|
||||
.venv
|
||||
**/.venv
|
||||
|
||||
# Built artifacts that are regenerated inside the image. Excluded so local
|
||||
# rebuilds on the developer's machine don't invalidate the npm-install layer
|
||||
# that now depends on the full ui-tui/packages/hermes-ink/ tree being present.
|
||||
ui-tui/dist/
|
||||
ui-tui/packages/hermes-ink/dist/
|
||||
|
||||
# CI/CD
|
||||
.github
|
||||
|
||||
@@ -19,3 +25,7 @@ node_modules
|
||||
|
||||
# Runtime data (bind-mounted at /opt/data; must not leak into build context)
|
||||
data/
|
||||
|
||||
# Compose/profile runtime state (bind-mounted; avoid ownership/secret issues)
|
||||
hermes-config/
|
||||
runtime/
|
||||
|
||||
@@ -244,6 +244,15 @@ BROWSERBASE_PROXIES=true
|
||||
# Uses custom Chromium build to avoid bot detection altogether
|
||||
BROWSERBASE_ADVANCED_STEALTH=false
|
||||
|
||||
# Browser engine for local mode (default: auto = Chrome)
|
||||
# "auto" — use Chrome (don't pass --engine flag)
|
||||
# "lightpanda" — use Lightpanda (1.3-5.8x faster navigation, no screenshots)
|
||||
# "chrome" — explicitly request Chrome
|
||||
# Requires agent-browser v0.25.3+. Lightpanda commands that fail or return
|
||||
# empty results are automatically retried with Chrome.
|
||||
# Also configurable via browser.engine in config.yaml.
|
||||
# AGENT_BROWSER_ENGINE=auto
|
||||
|
||||
# Browser session timeout in seconds (default: 300)
|
||||
# Sessions are cleaned up after this duration of inactivity
|
||||
BROWSER_SESSION_TIMEOUT=300
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
# Dependabot configuration for hermes-agent.
|
||||
#
|
||||
# Deliberately scoped to github-actions only.
|
||||
#
|
||||
# We do NOT enable Dependabot for pip / npm / any source-dependency ecosystem
|
||||
# because we pin source dependencies exactly (uv.lock, package-lock.json) as
|
||||
# part of our supply-chain posture. Automatic version-bump PRs against those
|
||||
# pins would undermine the strategy — pins are moved deliberately, after
|
||||
# review, not on a schedule.
|
||||
#
|
||||
# github-actions is the exception: action pins (we use full commit SHAs per
|
||||
# supply-chain policy) must be updated when upstream actions publish
|
||||
# patches — usually themselves security fixes. Dependabot opens a PR with
|
||||
# the new SHA and release notes; we review and merge like any other PR.
|
||||
#
|
||||
# Security-update PRs for source dependencies (opened ONLY when a CVE is
|
||||
# published affecting a currently-pinned version) are enabled separately
|
||||
# via the repo's Dependabot security updates setting
|
||||
# (Settings → Code security → Dependabot → Dependabot security updates).
|
||||
# Those are CVE-only, not schedule-driven, and do not conflict with our
|
||||
# pinning strategy — they fire when a pinned version becomes known-bad,
|
||||
# which is exactly when we want to move the pin.
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "monday"
|
||||
open-pull-requests-limit: 5
|
||||
labels:
|
||||
- "dependencies"
|
||||
- "github-actions"
|
||||
commit-message:
|
||||
prefix: "chore(actions)"
|
||||
include: "scope"
|
||||
groups:
|
||||
# Batch routine action bumps into one PR per week to reduce noise.
|
||||
# Security updates still open individually and bypass grouping.
|
||||
actions-minor-patch:
|
||||
update-types:
|
||||
- "minor"
|
||||
- "patch"
|
||||
@@ -76,6 +76,16 @@ jobs:
|
||||
run: |
|
||||
mkdir -p _site/docs
|
||||
cp -r website/build/* _site/docs/
|
||||
# llms.txt / llms-full.txt are also published at the site root
|
||||
# (https://hermes-agent.nousresearch.com/llms.txt) because some
|
||||
# agents and IDE plugins probe the classic root-level path rather
|
||||
# than /docs/llms.txt. Same file, two URLs, one source of truth.
|
||||
if [ -f website/build/llms.txt ]; then
|
||||
cp website/build/llms.txt _site/llms.txt
|
||||
fi
|
||||
if [ -f website/build/llms-full.txt ]; then
|
||||
cp website/build/llms-full.txt _site/llms-full.txt
|
||||
fi
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3
|
||||
|
||||
@@ -0,0 +1,158 @@
|
||||
name: Lint (ruff + ty)
|
||||
|
||||
# Surface ruff and ty diagnostics as a diff vs the target branch.
|
||||
# This check is advisory only ATM it always exits zero and never blocks merge.
|
||||
# It posts a Markdown summary to the workflow run and, for pull requests,
|
||||
# comments the same summary on the PR.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
- "website/**"
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
- "website/**"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write # needed to post/update PR comments
|
||||
|
||||
concurrency:
|
||||
group: lint-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
lint-diff:
|
||||
name: ruff + ty diff
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
fetch-depth: 0 # need full history for merge-base + worktree
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Install ruff + ty
|
||||
run: |
|
||||
uv tool install ruff
|
||||
uv tool install ty
|
||||
|
||||
- name: Determine base ref
|
||||
id: base
|
||||
env:
|
||||
PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
|
||||
run: |
|
||||
# For PRs, diff against the PR's pinned parent commit
|
||||
# (github.event.pull_request.base.sha — snapshot at PR open time,
|
||||
# so later pushes to main don't leak into the diff).
|
||||
# For pushes to main, diff against the previous commit on main.
|
||||
if [ "${{ github.event_name }}" = "pull_request" ]; then
|
||||
BASE_SHA="${PR_BASE_SHA}"
|
||||
BASE_REF="PR base (${BASE_SHA:0:7})"
|
||||
else
|
||||
BASE_SHA=$(git rev-parse HEAD~1 2>/dev/null || git rev-parse HEAD)
|
||||
BASE_REF="HEAD~1"
|
||||
fi
|
||||
echo "sha=${BASE_SHA}" >> "$GITHUB_OUTPUT"
|
||||
echo "ref=${BASE_REF}" >> "$GITHUB_OUTPUT"
|
||||
echo "Base SHA: ${BASE_SHA}"
|
||||
echo "Base ref: ${BASE_REF}"
|
||||
|
||||
- name: Run ruff + ty on HEAD
|
||||
run: |
|
||||
mkdir -p .lint-reports/head
|
||||
ruff check --output-format json --exit-zero \
|
||||
> .lint-reports/head/ruff.json || true
|
||||
ty check --output-format gitlab --exit-zero \
|
||||
> .lint-reports/head/ty.json || true
|
||||
echo "HEAD ruff: $(wc -c < .lint-reports/head/ruff.json) bytes"
|
||||
echo "HEAD ty: $(wc -c < .lint-reports/head/ty.json) bytes"
|
||||
|
||||
- name: Run ruff + ty on base (via git worktree)
|
||||
run: |
|
||||
mkdir -p .lint-reports/base
|
||||
# Use a worktree so we don't clobber the main checkout. If the basex
|
||||
# SHA is identical to HEAD (e.g. first commit), skip and leave the
|
||||
# base reports empty — the diff script handles missing files.
|
||||
HEAD_SHA=$(git rev-parse HEAD)
|
||||
BASE_SHA="${{ steps.base.outputs.sha }}"
|
||||
if [ "$BASE_SHA" = "$HEAD_SHA" ]; then
|
||||
echo "Base SHA == HEAD SHA, skipping base scan."
|
||||
echo '[]' > .lint-reports/base/ruff.json
|
||||
echo '[]' > .lint-reports/base/ty.json
|
||||
else
|
||||
git worktree add --detach /tmp/lint-base "$BASE_SHA"
|
||||
(
|
||||
cd /tmp/lint-base
|
||||
ruff check --output-format json --exit-zero \
|
||||
> "$GITHUB_WORKSPACE/.lint-reports/base/ruff.json" || true
|
||||
ty check --output-format gitlab --exit-zero \
|
||||
> "$GITHUB_WORKSPACE/.lint-reports/base/ty.json" || true
|
||||
)
|
||||
git worktree remove --force /tmp/lint-base
|
||||
fi
|
||||
echo "base ruff: $(wc -c < .lint-reports/base/ruff.json) bytes"
|
||||
echo "base ty: $(wc -c < .lint-reports/base/ty.json) bytes"
|
||||
|
||||
- name: Generate diff summary
|
||||
run: |
|
||||
python scripts/lint_diff.py \
|
||||
--base-ruff .lint-reports/base/ruff.json \
|
||||
--head-ruff .lint-reports/head/ruff.json \
|
||||
--base-ty .lint-reports/base/ty.json \
|
||||
--head-ty .lint-reports/head/ty.json \
|
||||
--base-ref "${{ steps.base.outputs.ref }}" \
|
||||
--head-ref "${{ github.event_name == 'pull_request' && github.head_ref || github.ref_name }}" \
|
||||
--output .lint-reports/summary.md
|
||||
cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
- name: Upload reports as artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: lint-reports
|
||||
path: .lint-reports/
|
||||
retention-days: 14
|
||||
# .lint-reports/ is a dotfile-prefixed directory, and upload-artifact@v4
|
||||
# skips hidden files by default (breaking change from v3). Opt back in.
|
||||
include-hidden-files: true
|
||||
|
||||
- name: Post / update PR comment
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const body = fs.readFileSync('.lint-reports/summary.md', 'utf8');
|
||||
const marker = '<!-- lint-diff-summary -->';
|
||||
const fullBody = marker + '\n' + body;
|
||||
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
});
|
||||
const existing = comments.find(c => c.body && c.body.includes(marker));
|
||||
if (existing) {
|
||||
await github.rest.issues.updateComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: existing.id,
|
||||
body: fullBody,
|
||||
});
|
||||
} else {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: fullBody,
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
name: OSV-Scanner
|
||||
|
||||
# Scans lockfiles (uv.lock, package-lock.json) against the OSV vulnerability
|
||||
# database. Runs on every PR that touches a lockfile and on a weekly schedule
|
||||
# against main.
|
||||
#
|
||||
# This is detection-only — OSV-Scanner does NOT open PRs or modify pins.
|
||||
# It reports known CVEs in currently-pinned dependency versions so we can
|
||||
# decide when and how to patch on our own schedule. Our pinning strategy
|
||||
# (full SHA / exact version) is preserved; only the notification signal
|
||||
# is added.
|
||||
#
|
||||
# Complements the existing supply-chain-audit.yml workflow (which scans
|
||||
# for malicious code patterns in PR diffs) by covering the orthogonal
|
||||
# "currently-pinned dep became known-vulnerable" case.
|
||||
#
|
||||
# Uses Google's officially-recommended reusable workflow, pinned by SHA.
|
||||
# Findings land in the repo's Security tab (Code Scanning > OSV-Scanner).
|
||||
# fail-on-vuln is disabled so the job does not block merges on pre-existing
|
||||
# vulnerabilities in pinned deps that we may need to patch deliberately.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
- 'package.json'
|
||||
- 'package-lock.json'
|
||||
- 'ui-tui/package.json'
|
||||
- 'ui-tui/package-lock.json'
|
||||
- 'website/package.json'
|
||||
- 'website/package-lock.json'
|
||||
- '.github/workflows/osv-scanner.yml'
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
- 'package.json'
|
||||
- 'package-lock.json'
|
||||
- 'ui-tui/package-lock.json'
|
||||
- 'website/package-lock.json'
|
||||
schedule:
|
||||
# Weekly scan against main — catches CVEs published after merge for
|
||||
# deps that haven't changed since.
|
||||
- cron: '0 9 * * 1'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
# Required by the reusable workflow to upload SARIF to the Security tab.
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
jobs:
|
||||
scan:
|
||||
name: Scan lockfiles
|
||||
uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@c51854704019a247608d928f370c98740469d4b5 # v2.3.5
|
||||
with:
|
||||
# Scan explicit lockfiles rather than recursing, so we only look at
|
||||
# the three sources of truth and skip vendored / test / worktree dirs.
|
||||
scan-args: |-
|
||||
--lockfile=uv.lock
|
||||
--lockfile=ui-tui/package-lock.json
|
||||
--lockfile=website/package-lock.json
|
||||
fail-on-vuln: false
|
||||
@@ -37,12 +37,18 @@ hermes-agent/
|
||||
│ ├── platforms/ # Adapter per platform (telegram, discord, slack, whatsapp,
|
||||
│ │ # homeassistant, signal, matrix, mattermost, email, sms,
|
||||
│ │ # dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
|
||||
│ │ # webhook, api_server, ...). See ADDING_A_PLATFORM.md.
|
||||
│ │ # yuanbao, webhook, api_server, ...). See ADDING_A_PLATFORM.md.
|
||||
│ └── builtin_hooks/ # Extension point for always-registered gateway hooks (none shipped)
|
||||
├── plugins/ # Plugin system (see "Plugins" section below)
|
||||
│ ├── memory/ # Memory-provider plugins (honcho, mem0, supermemory, ...)
|
||||
│ ├── context_engine/ # Context-engine plugins
|
||||
│ └── <others>/ # Dashboard, image-gen, disk-cleanup, examples, ...
|
||||
│ ├── model-providers/ # Inference backend plugins (openrouter, anthropic, gmi, ...)
|
||||
│ ├── kanban/ # Multi-agent board dispatcher + worker plugin
|
||||
│ ├── hermes-achievements/ # Gamified achievement tracking
|
||||
│ ├── observability/ # Metrics / traces / logs plugin
|
||||
│ ├── image_gen/ # Image-generation providers
|
||||
│ └── <others>/ # disk-cleanup, example-dashboard, google_meet, platforms,
|
||||
│ # spotify, strike-freedom-cockpit, ...
|
||||
├── optional-skills/ # Heavier/niche skills shipped but NOT active by default
|
||||
├── skills/ # Built-in skills bundled with the repo
|
||||
├── ui-tui/ # Ink (React) terminal UI — `hermes --tui`
|
||||
@@ -53,7 +59,7 @@ hermes-agent/
|
||||
├── environments/ # RL training environments (Atropos)
|
||||
├── scripts/ # run_tests.sh, release.py, auxiliary scripts
|
||||
├── website/ # Docusaurus docs site
|
||||
└── tests/ # Pytest suite (~15k tests across ~700 files as of Apr 2026)
|
||||
└── tests/ # Pytest suite (~17k tests across ~900 files as of May 2026)
|
||||
```
|
||||
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
|
||||
@@ -257,7 +263,16 @@ The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes
|
||||
|
||||
## Adding New Tools
|
||||
|
||||
Requires changes in **2 files**:
|
||||
For most custom or local-only tools, do **not** edit Hermes core. Use the plugin
|
||||
route instead: create `~/.hermes/plugins/<name>/plugin.yaml` and
|
||||
`~/.hermes/plugins/<name>/__init__.py`, then register tools with
|
||||
`ctx.register_tool(...)`. Plugin toolsets are discovered automatically and can be
|
||||
enabled or disabled without touching `tools/` or `toolsets.py`.
|
||||
|
||||
Use the built-in route below only when the user is explicitly contributing a new
|
||||
core Hermes tool that should ship in the base system.
|
||||
|
||||
Built-in/core tools require changes in **2 files**:
|
||||
|
||||
**1. Create `tools/your_tool.py`:**
|
||||
```python
|
||||
@@ -280,9 +295,9 @@ registry.register(
|
||||
)
|
||||
```
|
||||
|
||||
**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.
|
||||
**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset. **This step is required:** auto-discovery imports the tool and registers its schema, but the tool is only *exposed to an agent* if its name appears in a toolset. `_HERMES_CORE_TOOLS` is not dead code — it's the default bundle every platform's base toolset inherits from.
|
||||
|
||||
Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain.
|
||||
Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain. Wiring into a toolset is still a deliberate, manual step.
|
||||
|
||||
The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string.
|
||||
|
||||
@@ -304,6 +319,22 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
section is handled automatically by the deep-merge and does NOT require
|
||||
a version bump.
|
||||
|
||||
### Top-level `config.yaml` sections (non-exhaustive):
|
||||
|
||||
`model`, `agent`, `terminal`, `compression`, `display`, `stt`, `tts`,
|
||||
`memory`, `security`, `delegation`, `smart_model_routing`, `checkpoints`,
|
||||
`auxiliary`, `curator`, `skills`, `gateway`, `logging`, `cron`, `profiles`,
|
||||
`plugins`, `honcho`.
|
||||
|
||||
`auxiliary` holds per-task overrides for side-LLM work (curator, vision,
|
||||
embedding, title generation, session_search, etc.) — each task can pin
|
||||
its own provider/model/base_url/max_tokens/reasoning_effort. See
|
||||
`agent/auxiliary_client.py::_resolve_auto` for resolution order.
|
||||
|
||||
`curator` holds the background skill-maintenance config —
|
||||
`enabled`, `interval_hours`, `min_idle_hours`, `stale_after_days`,
|
||||
`archive_after_days`, `backup` (nested).
|
||||
|
||||
### .env variables (SECRETS ONLY — API keys, tokens, passwords):
|
||||
1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
|
||||
```python
|
||||
@@ -482,6 +513,31 @@ generic plugin surface (new hook, new ctx method) — never hardcode
|
||||
plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
|
||||
honcho argparse from `main.py` for exactly this reason.
|
||||
|
||||
### Model-provider plugins (`plugins/model-providers/<name>/`)
|
||||
|
||||
Every inference backend (openrouter, anthropic, gmi, deepseek, nvidia, …)
|
||||
ships as a plugin here. Each plugin's `__init__.py` calls
|
||||
`providers.register_provider(ProviderProfile(...))` at module load.
|
||||
`providers/__init__.py._discover_providers()` is a **lazy, separate
|
||||
discovery system** — scanned on first `get_provider_profile()` or
|
||||
`list_providers()` call, NOT by the general PluginManager.
|
||||
|
||||
Scan order:
|
||||
1. Bundled: `<repo>/plugins/model-providers/<name>/`
|
||||
2. User: `$HERMES_HOME/plugins/model-providers/<name>/`
|
||||
3. Legacy: `<repo>/providers/<name>.py` (back-compat)
|
||||
|
||||
User plugins of the same name override bundled ones — `register_provider()`
|
||||
is last-writer-wins. This lets third parties swap out any built-in
|
||||
profile without a repo patch.
|
||||
|
||||
The general PluginManager records `kind: model-provider` manifests but does
|
||||
NOT import them (would double-instantiate `ProviderProfile`). Plugins
|
||||
without an explicit `kind:` get auto-coerced via a source-text heuristic
|
||||
(`register_provider` + `ProviderProfile` in `__init__.py`).
|
||||
|
||||
Full authoring guide: `website/docs/developer-guide/model-provider-plugin.md`.
|
||||
|
||||
### Dashboard / context-engine / image-gen plugin directories
|
||||
|
||||
`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
|
||||
@@ -510,11 +566,176 @@ niche skills belong in `optional-skills/`.
|
||||
|
||||
### SKILL.md frontmatter
|
||||
|
||||
Standard fields: `name`, `description`, `version`, `platforms`
|
||||
(OS-gating list: `[macos]`, `[linux, macos]`, ...),
|
||||
Standard fields: `name`, `description`, `version`, `author`, `license`,
|
||||
`platforms` (OS-gating list: `[macos]`, `[linux, macos]`, ...),
|
||||
`metadata.hermes.tags`, `metadata.hermes.category`,
|
||||
`metadata.hermes.config` (config.yaml settings the skill needs — stored
|
||||
under `skills.config.<key>`, prompted during setup, injected at load time).
|
||||
`metadata.hermes.related_skills`, `metadata.hermes.config` (config.yaml
|
||||
settings the skill needs — stored under `skills.config.<key>`, prompted
|
||||
during setup, injected at load time).
|
||||
|
||||
Top-level `tags:` and `category:` are also accepted and mirrored from
|
||||
`metadata.hermes.*` by the loader.
|
||||
|
||||
---
|
||||
|
||||
## Toolsets
|
||||
|
||||
All toolsets are defined in `toolsets.py` as a single `TOOLSETS` dict.
|
||||
Each platform's adapter picks a base toolset (e.g. Telegram uses
|
||||
`"messaging"`); `_HERMES_CORE_TOOLS` is the default bundle most
|
||||
platforms inherit from.
|
||||
|
||||
Current toolset keys: `browser`, `clarify`, `code_execution`, `cronjob`,
|
||||
`debugging`, `delegation`, `discord`, `discord_admin`, `feishu_doc`,
|
||||
`feishu_drive`, `file`, `homeassistant`, `image_gen`, `kanban`, `memory`,
|
||||
`messaging`, `moa`, `rl`, `safe`, `search`, `session_search`, `skills`,
|
||||
`spotify`, `terminal`, `todo`, `tts`, `video`, `vision`, `web`, `yuanbao`.
|
||||
|
||||
Enable/disable per platform via `hermes tools` (the curses UI) or the
|
||||
`tools.<platform>.enabled` / `tools.<platform>.disabled` lists in
|
||||
`config.yaml`.
|
||||
|
||||
---
|
||||
|
||||
## Delegation (`delegate_task`)
|
||||
|
||||
`tools/delegate_tool.py` spawns a subagent with an isolated
|
||||
context + terminal session. Synchronous: the parent waits for the
|
||||
child's summary before continuing its own loop — if the parent is
|
||||
interrupted, the child is cancelled.
|
||||
|
||||
Two shapes:
|
||||
|
||||
- **Single:** pass `goal` (+ optional `context`, `toolsets`).
|
||||
- **Batch (parallel):** pass `tasks: [...]` — each gets its own subagent
|
||||
running concurrently. Concurrency is capped by
|
||||
`delegation.max_concurrent_children` (default 3).
|
||||
|
||||
Roles:
|
||||
|
||||
- `role="leaf"` (default) — focused worker. Cannot call `delegate_task`,
|
||||
`clarify`, `memory`, `send_message`, `execute_code`.
|
||||
- `role="orchestrator"` — retains `delegate_task` so it can spawn its
|
||||
own workers. Gated by `delegation.orchestrator_enabled` (default true)
|
||||
and bounded by `delegation.max_spawn_depth` (default 2).
|
||||
|
||||
Key config knobs (under `delegation:` in `config.yaml`):
|
||||
`max_concurrent_children`, `max_spawn_depth`, `child_timeout_seconds`,
|
||||
`orchestrator_enabled`, `subagent_auto_approve`, `inherit_mcp_toolsets`,
|
||||
`max_iterations`.
|
||||
|
||||
Synchronicity rule: delegate_task is **not** durable. For long-running
|
||||
work that must outlive the current turn, use `cronjob` or
|
||||
`terminal(background=True, notify_on_complete=True)` instead.
|
||||
|
||||
---
|
||||
|
||||
## Curator (skill lifecycle)
|
||||
|
||||
Background skill-maintenance system that tracks usage on agent-created
|
||||
skills and auto-archives stale ones. Users never lose skills; archives
|
||||
go to `~/.hermes/skills/.archive/` and are restorable.
|
||||
|
||||
- **Core:** `agent/curator.py` (review loop, auto-transitions, LLM review
|
||||
prompt) + `agent/curator_backup.py` (pre-run tar.gz snapshots).
|
||||
- **CLI:** `hermes_cli/curator.py` wires `hermes curator <verb>` where
|
||||
verbs are: `status`, `run`, `pause`, `resume`, `pin`, `unpin`,
|
||||
`archive`, `restore`, `prune`, `backup`, `rollback`.
|
||||
- **Telemetry:** `tools/skill_usage.py` owns the sidecar
|
||||
`~/.hermes/skills/.usage.json` — per-skill `use_count`, `view_count`,
|
||||
`patch_count`, `last_activity_at`, `state` (active / stale /
|
||||
archived), `pinned`.
|
||||
|
||||
Invariants:
|
||||
- Curator only touches skills with `created_by: "agent"` provenance —
|
||||
bundled + hub-installed skills are off-limits.
|
||||
- Never deletes; max destructive action is archive.
|
||||
- Pinned skills are exempt from every auto-transition and from the
|
||||
LLM review pass.
|
||||
- `skill_manage(action="delete")` refuses pinned skills; patch/edit/
|
||||
write_file/remove_file go through so the agent can keep improving
|
||||
pinned skills.
|
||||
|
||||
Config section (`curator:` in `config.yaml`):
|
||||
`enabled`, `interval_hours`, `min_idle_hours`, `stale_after_days`,
|
||||
`archive_after_days`, `backup.*`.
|
||||
|
||||
Full user-facing docs: `website/docs/user-guide/features/curator.md`.
|
||||
|
||||
---
|
||||
|
||||
## Cron (scheduled jobs)
|
||||
|
||||
`cron/jobs.py` (job store) + `cron/scheduler.py` (tick loop). Agents
|
||||
schedule jobs via the `cronjob` tool; users via `hermes cron <verb>`
|
||||
(`list`, `add`, `edit`, `pause`, `resume`, `run`, `remove`) or the
|
||||
`/cron` slash command.
|
||||
|
||||
Supported schedule formats:
|
||||
- Duration: `"30m"`, `"2h"`, `"1d"`
|
||||
- "every" phrase: `"every 2h"`, `"every monday 9am"`
|
||||
- 5-field cron expression: `"0 9 * * *"`
|
||||
- ISO timestamp (one-shot): `"2026-06-01T09:00:00Z"`
|
||||
|
||||
Per-job fields include `skills` (load specific skills), `model` /
|
||||
`provider` overrides, `script` (pre-run data-collection script whose
|
||||
stdout is injected into the prompt; `no_agent=True` turns the script
|
||||
into the entire job), `context_from` (chain job A's last output into
|
||||
job B's prompt), `workdir` (run in a specific directory with its
|
||||
`AGENTS.md`/`CLAUDE.md` loaded), and multi-platform delivery.
|
||||
|
||||
Hardening invariants:
|
||||
- **3-minute hard interrupt** on cron sessions — runaway agent loops
|
||||
cannot monopolize the scheduler.
|
||||
- Catchup window: half the job's period, clamped to 120s–2h.
|
||||
- Grace window: 120s for one-shot jobs whose fire time was missed.
|
||||
- File lock at `~/.hermes/cron/.tick.lock` prevents duplicate ticks
|
||||
across processes.
|
||||
- Cron sessions pass `skip_memory=True` by default; memory providers
|
||||
intentionally do not run during cron.
|
||||
|
||||
Cron deliveries are **not** mirrored into the target gateway session —
|
||||
they land in their own cron session with a header/footer frame so the
|
||||
main conversation's message-role alternation stays intact.
|
||||
|
||||
---
|
||||
|
||||
## Kanban (multi-agent work queue)
|
||||
|
||||
Durable SQLite-backed board that lets multiple profiles / workers
|
||||
collaborate on shared tasks. Users drive it via `hermes kanban <verb>`;
|
||||
workers spawned by the dispatcher drive it via a dedicated `kanban_*`
|
||||
toolset so their schema footprint is zero when they're not inside a
|
||||
kanban task.
|
||||
|
||||
- **CLI:** `hermes_cli/kanban.py` wires `hermes kanban` with verbs
|
||||
`init`, `create`, `list` (alias `ls`), `show`, `assign`, `link`,
|
||||
`unlink`, `comment`, `complete`, `block`, `unblock`, `archive`,
|
||||
`tail`, plus less-commonly-used `watch`, `stats`, `runs`, `log`,
|
||||
`assignees`, `heartbeat`, `notify-*`, `dispatch`, `daemon`, `gc`.
|
||||
- **Worker toolset:** `tools/kanban_tools.py` exposes `kanban_show`,
|
||||
`kanban_complete`, `kanban_block`, `kanban_heartbeat`, `kanban_comment`,
|
||||
`kanban_create`, `kanban_link` — gated by `HERMES_KANBAN_TASK` so
|
||||
the schema only appears for processes actually running as a worker.
|
||||
- **Dispatcher:** long-lived loop that (default every 60s) reclaims
|
||||
stale claims, promotes ready tasks, atomically claims, and spawns
|
||||
assigned profiles. Runs **inside the gateway** by default via
|
||||
`kanban.dispatch_in_gateway: true`.
|
||||
- **Plugin assets:** `plugins/kanban/dashboard/` (web UI) +
|
||||
`plugins/kanban/systemd/` (`hermes-kanban-dispatcher.service` for
|
||||
standalone dispatcher deployment).
|
||||
|
||||
Isolation model:
|
||||
- **Board** is the hard boundary — workers are spawned with
|
||||
`HERMES_KANBAN_BOARD` pinned in their env so they can't see other
|
||||
boards.
|
||||
- **Tenant** is a soft namespace *within* a board — one specialist
|
||||
fleet can serve multiple businesses with workspace-path + memory-key
|
||||
isolation.
|
||||
- After ~5 consecutive spawn failures on the same task the dispatcher
|
||||
auto-blocks it to prevent spin loops.
|
||||
|
||||
Full user-facing docs: `website/docs/user-guide/features/kanban.md`.
|
||||
|
||||
---
|
||||
|
||||
|
||||
+18
-9
@@ -28,10 +28,26 @@ WORKDIR /opt/hermes
|
||||
# ---------- Layer-cached dependency install ----------
|
||||
# Copy only package manifests first so npm install + Playwright are cached
|
||||
# unless the lockfiles themselves change.
|
||||
#
|
||||
# ui-tui/packages/hermes-ink/ is copied IN FULL (not just its manifests)
|
||||
# because it is referenced as a `file:` workspace dependency from
|
||||
# ui-tui/package.json. Copying the tree up front lets npm resolve the
|
||||
# workspace to real content instead of stopping at a bare package.json.
|
||||
COPY package.json package-lock.json ./
|
||||
COPY web/package.json web/package-lock.json web/
|
||||
COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
|
||||
COPY ui-tui/packages/hermes-ink/package.json ui-tui/packages/hermes-ink/package-lock.json ui-tui/packages/hermes-ink/
|
||||
COPY ui-tui/packages/hermes-ink/ ui-tui/packages/hermes-ink/
|
||||
|
||||
# `npm_config_install_links=false` forces npm to install `file:` deps as
|
||||
# symlinks (the npm 10+ default) even on Debian's older bundled npm 9.x,
|
||||
# which defaults to `install-links=true` and installs file deps as *copies*.
|
||||
# The host-side package-lock.json is generated with a newer npm that uses
|
||||
# symlinks, so an install-as-copy produces a hidden node_modules/.package-lock.json
|
||||
# that permanently disagrees with the root lock on the @hermes/ink entry.
|
||||
# That disagreement trips the TUI launcher's `_tui_need_npm_install()`
|
||||
# check on every startup and triggers a runtime `npm install` that then
|
||||
# fails with EACCES (node_modules/ is root-owned from build time).
|
||||
ENV npm_config_install_links=false
|
||||
|
||||
RUN npm install --prefer-offline --no-audit && \
|
||||
npx playwright install --with-deps chromium --only-shell && \
|
||||
@@ -45,14 +61,7 @@ COPY --chown=hermes:hermes . .
|
||||
|
||||
# Build browser dashboard and terminal UI assets.
|
||||
RUN cd web && npm run build && \
|
||||
cd ../ui-tui && npm run build && \
|
||||
rm -rf node_modules/@hermes/ink && \
|
||||
rm -rf packages/hermes-ink/node_modules && \
|
||||
cp -R packages/hermes-ink node_modules/@hermes/ink && \
|
||||
npm install --omit=dev --prefer-offline --no-audit --prefix node_modules/@hermes/ink && \
|
||||
rm -rf node_modules/@hermes/ink/node_modules/react && \
|
||||
node --input-type=module -e "await import('@hermes/ink')" && \
|
||||
touch .hermes-prebuilt-tui
|
||||
cd ../ui-tui && npm run build
|
||||
|
||||
# ---------- Permissions ----------
|
||||
# Make install dir world-readable so any HERMES_UID can read it at runtime.
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
<a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
|
||||
<a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
|
||||
<a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
|
||||
<a href="README.zh-CN.md"><img src="https://img.shields.io/badge/Lang-中文-red?style=for-the-badge" alt="中文"></a>
|
||||
</p>
|
||||
|
||||
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
|
||||
@@ -21,7 +22,7 @@ Use any model you want — [Nous Portal](https://portal.nousresearch.com), [Open
|
||||
<tr><td><b>A closed learning loop</b></td><td>Agent-curated memory with periodic nudges. Autonomous skill creation after complex tasks. Skills self-improve during use. FTS5 session search with LLM summarization for cross-session recall. <a href="https://github.com/plastic-labs/honcho">Honcho</a> dialectic user modeling. Compatible with the <a href="https://agentskills.io">agentskills.io</a> open standard.</td></tr>
|
||||
<tr><td><b>Scheduled automations</b></td><td>Built-in cron scheduler with delivery to any platform. Daily reports, nightly backups, weekly audits — all in natural language, running unattended.</td></tr>
|
||||
<tr><td><b>Delegates and parallelizes</b></td><td>Spawn isolated subagents for parallel workstreams. Write Python scripts that call tools via RPC, collapsing multi-step pipelines into zero-context-cost turns.</td></tr>
|
||||
<tr><td><b>Runs anywhere, not just your laptop</b></td><td>Six terminal backends — local, Docker, SSH, Daytona, Singularity, and Modal. Daytona and Modal offer serverless persistence — your agent's environment hibernates when idle and wakes on demand, costing nearly nothing between sessions. Run it on a $5 VPS or a GPU cluster.</td></tr>
|
||||
<tr><td><b>Runs anywhere, not just your laptop</b></td><td>Seven terminal backends — local, Docker, SSH, Singularity, Modal, Daytona, and Vercel Sandbox. Daytona and Modal offer serverless persistence — your agent's environment hibernates when idle and wakes on demand, costing nearly nothing between sessions. Run it on a $5 VPS or a GPU cluster.</td></tr>
|
||||
<tr><td><b>Research-ready</b></td><td>Batch trajectory generation, Atropos RL environments, trajectory compression for training the next generation of tool-calling models.</td></tr>
|
||||
</table>
|
||||
|
||||
|
||||
+186
@@ -0,0 +1,186 @@
|
||||
<p align="center">
|
||||
<img src="assets/banner.png" alt="Hermes Agent" width="100%">
|
||||
</p>
|
||||
|
||||
# Hermes Agent ☤
|
||||
|
||||
<p align="center">
|
||||
<a href="https://hermes-agent.nousresearch.com/docs/"><img src="https://img.shields.io/badge/Docs-hermes--agent.nousresearch.com-FFD700?style=for-the-badge" alt="Documentation"></a>
|
||||
<a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
|
||||
<a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
|
||||
<a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
|
||||
<a href="README.md"><img src="https://img.shields.io/badge/Lang-English-lightgrey?style=for-the-badge" alt="English"></a>
|
||||
</p>
|
||||
|
||||
**由 [Nous Research](https://nousresearch.com) 构建的自进化 AI 代理。** 它是唯一内置学习闭环的智能代理——从经验中创建技能,在使用中改进技能,主动持久化知识,搜索过往对话,并在跨会话中逐步构建对你的深度理解。可以在 $5 的 VPS 上运行,也可以在 GPU 集群上运行,或者使用几乎零成本的 Serverless 基础设施。它不绑定你的笔记本——你可以在 Telegram 上与它对话,而它在云端 VM 上工作。
|
||||
|
||||
支持任意模型——[Nous Portal](https://portal.nousresearch.com)、[OpenRouter](https://openrouter.ai)(200+ 模型)、[NVIDIA NIM](https://build.nvidia.com)(Nemotron)、[小米 MiMo](https://platform.xiaomimimo.com)、[z.ai/GLM](https://z.ai)、[Kimi/Moonshot](https://platform.moonshot.ai)、[MiniMax](https://www.minimax.io)、[Hugging Face](https://huggingface.co)、OpenAI,或自定义端点。使用 `hermes model` 即可切换——无需改代码,无锁定。
|
||||
|
||||
<table>
|
||||
<tr><td><b>真正的终端界面</b></td><td>完整的 TUI,支持多行编辑、斜杠命令自动补全、对话历史、中断重定向和流式工具输出。</td></tr>
|
||||
<tr><td><b>随你所在</b></td><td>Telegram、Discord、Slack、WhatsApp、Signal 和 CLI——全部从单个网关进程运行。语音备忘录转写、跨平台对话连续性。</td></tr>
|
||||
<tr><td><b>闭环学习</b></td><td>代理管理记忆并定期自我提醒。复杂任务后自动创建技能。技能在使用中自我改进。FTS5 会话搜索配合 LLM 摘要实现跨会话回溯。<a href="https://github.com/plastic-labs/honcho">Honcho</a> 辩证式用户建模。兼容 <a href="https://agentskills.io">agentskills.io</a> 开放标准。</td></tr>
|
||||
<tr><td><b>定时自动化</b></td><td>内置 cron 调度器,支持向任何平台投递。日报、夜间备份、周审计——全部用自然语言描述,无人值守运行。</td></tr>
|
||||
<tr><td><b>委派与并行</b></td><td>生成隔离子代理处理并行工作流。编写 Python 脚本通过 RPC 调用工具,将多步管道压缩为零上下文开销的轮次。</td></tr>
|
||||
<tr><td><b>随处运行</b></td><td>六种终端后端——本地、Docker、SSH、Daytona、Singularity 和 Modal。Daytona 和 Modal 提供 Serverless 持久化——代理环境空闲时休眠、按需唤醒,空闲期间几乎零成本。$5 VPS 或 GPU 集群都能跑。</td></tr>
|
||||
<tr><td><b>研究就绪</b></td><td>批量轨迹生成、Atropos RL 环境、轨迹压缩——用于训练下一代工具调用模型。</td></tr>
|
||||
</table>
|
||||
|
||||
---
|
||||
|
||||
## 快速安装
|
||||
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
|
||||
```
|
||||
|
||||
支持 Linux、macOS、WSL2 和 Android (Termux)。安装程序会自动处理平台特定的配置。
|
||||
|
||||
> **Android / Termux:** 已测试的手动安装路径请参考 [Termux 指南](https://hermes-agent.nousresearch.com/docs/getting-started/termux)。在 Termux 上,Hermes 会安装精选的 `.[termux]` 扩展,因为完整的 `.[all]` 扩展会拉取 Android 不兼容的语音依赖。
|
||||
>
|
||||
> **Windows:** 原生 Windows 不受支持。请安装 [WSL2](https://learn.microsoft.com/zh-cn/windows/wsl/install) 并运行上述命令。
|
||||
|
||||
安装后:
|
||||
|
||||
```bash
|
||||
source ~/.bashrc # 重新加载 shell(或: source ~/.zshrc)
|
||||
hermes # 开始对话!
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 快速入门
|
||||
|
||||
```bash
|
||||
hermes # 交互式 CLI — 开始对话
|
||||
hermes model # 选择 LLM 提供商和模型
|
||||
hermes tools # 配置启用的工具
|
||||
hermes config set # 设置单个配置项
|
||||
hermes gateway # 启动消息网关(Telegram、Discord 等)
|
||||
hermes setup # 运行完整设置向导(一次性配置所有内容)
|
||||
hermes claw migrate # 从 OpenClaw 迁移(如果来自 OpenClaw)
|
||||
hermes update # 更新到最新版本
|
||||
hermes doctor # 诊断问题
|
||||
```
|
||||
|
||||
📖 **[完整文档 →](https://hermes-agent.nousresearch.com/docs/)**
|
||||
|
||||
## CLI 与消息平台 快速对照
|
||||
|
||||
Hermes 有两种入口:用 `hermes` 启动终端 UI,或运行网关从 Telegram、Discord、Slack、WhatsApp、Signal 或 Email 与之对话。进入对话后,许多斜杠命令在两种界面中通用。
|
||||
|
||||
| 操作 | CLI | 消息平台 |
|
||||
|------|-----|----------|
|
||||
| 开始对话 | `hermes` | 运行 `hermes gateway setup` + `hermes gateway start`,然后给机器人发消息 |
|
||||
| 开始新对话 | `/new` 或 `/reset` | `/new` 或 `/reset` |
|
||||
| 更换模型 | `/model [provider:model]` | `/model [provider:model]` |
|
||||
| 设置人格 | `/personality [name]` | `/personality [name]` |
|
||||
| 重试或撤销上一轮 | `/retry`、`/undo` | `/retry`、`/undo` |
|
||||
| 压缩上下文 / 查看用量 | `/compress`、`/usage`、`/insights [--days N]` | `/compress`、`/usage`、`/insights [days]` |
|
||||
| 浏览技能 | `/skills` 或 `/<skill-name>` | `/skills` 或 `/<skill-name>` |
|
||||
| 中断当前工作 | `Ctrl+C` 或发送新消息 | `/stop` 或发送新消息 |
|
||||
| 平台特定状态 | `/platforms` | `/status`、`/sethome` |
|
||||
|
||||
完整命令列表请参阅 [CLI 指南](https://hermes-agent.nousresearch.com/docs/user-guide/cli) 和 [消息网关指南](https://hermes-agent.nousresearch.com/docs/user-guide/messaging)。
|
||||
|
||||
---
|
||||
|
||||
## 文档
|
||||
|
||||
所有文档位于 **[hermes-agent.nousresearch.com/docs](https://hermes-agent.nousresearch.com/docs/)**:
|
||||
|
||||
| 章节 | 内容 |
|
||||
|------|------|
|
||||
| [快速开始](https://hermes-agent.nousresearch.com/docs/getting-started/quickstart) | 安装 → 设置 → 2 分钟内开始首次对话 |
|
||||
| [CLI 使用](https://hermes-agent.nousresearch.com/docs/user-guide/cli) | 命令、快捷键、人格、会话 |
|
||||
| [配置](https://hermes-agent.nousresearch.com/docs/user-guide/configuration) | 配置文件、提供商、模型、所有选项 |
|
||||
| [消息网关](https://hermes-agent.nousresearch.com/docs/user-guide/messaging) | Telegram、Discord、Slack、WhatsApp、Signal、Home Assistant |
|
||||
| [安全](https://hermes-agent.nousresearch.com/docs/user-guide/security) | 命令审批、DM 配对、容器隔离 |
|
||||
| [工具与工具集](https://hermes-agent.nousresearch.com/docs/user-guide/features/tools) | 40+ 工具、工具集系统、终端后端 |
|
||||
| [技能系统](https://hermes-agent.nousresearch.com/docs/user-guide/features/skills) | 过程记忆、技能中心、创建技能 |
|
||||
| [记忆](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory) | 持久记忆、用户画像、最佳实践 |
|
||||
| [MCP 集成](https://hermes-agent.nousresearch.com/docs/user-guide/features/mcp) | 连接任意 MCP 服务器扩展能力 |
|
||||
| [定时调度](https://hermes-agent.nousresearch.com/docs/user-guide/features/cron) | 定时任务与平台投递 |
|
||||
| [上下文文件](https://hermes-agent.nousresearch.com/docs/user-guide/features/context-files) | 影响每次对话的项目上下文 |
|
||||
| [架构](https://hermes-agent.nousresearch.com/docs/developer-guide/architecture) | 项目结构、代理循环、关键类 |
|
||||
| [贡献](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) | 开发设置、PR 流程、代码风格 |
|
||||
| [CLI 参考](https://hermes-agent.nousresearch.com/docs/reference/cli-commands) | 所有命令和标志 |
|
||||
| [环境变量](https://hermes-agent.nousresearch.com/docs/reference/environment-variables) | 完整环境变量参考 |
|
||||
|
||||
---
|
||||
|
||||
## 从 OpenClaw 迁移
|
||||
|
||||
如果你来自 OpenClaw,Hermes 可以自动导入你的设置、记忆、技能和 API 密钥。
|
||||
|
||||
**首次安装时:** 安装向导(`hermes setup`)会自动检测 `~/.openclaw` 并在配置开始前提供迁移选项。
|
||||
|
||||
**安装后任意时间:**
|
||||
|
||||
```bash
|
||||
hermes claw migrate # 交互式迁移(完整预设)
|
||||
hermes claw migrate --dry-run # 预览将要迁移的内容
|
||||
hermes claw migrate --preset user-data # 仅迁移用户数据,不含密钥
|
||||
hermes claw migrate --overwrite # 覆盖已有冲突
|
||||
```
|
||||
|
||||
导入内容:
|
||||
- **SOUL.md** — 人格文件
|
||||
- **记忆** — MEMORY.md 和 USER.md 条目
|
||||
- **技能** — 用户创建的技能 → `~/.hermes/skills/openclaw-imports/`
|
||||
- **命令白名单** — 审批模式
|
||||
- **消息设置** — 平台配置、允许用户、工作目录
|
||||
- **API 密钥** — 白名单中的密钥(Telegram、OpenRouter、OpenAI、Anthropic、ElevenLabs)
|
||||
- **TTS 资产** — 工作区音频文件
|
||||
- **工作区指令** — AGENTS.md(使用 `--workspace-target`)
|
||||
|
||||
使用 `hermes claw migrate --help` 查看所有选项,或使用 `openclaw-migration` 技能进行交互式代理引导迁移(含干运行预览)。
|
||||
|
||||
---
|
||||
|
||||
## 贡献
|
||||
|
||||
欢迎贡献!请参阅 [贡献指南](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) 了解开发设置、代码风格和 PR 流程。
|
||||
|
||||
贡献者快速开始——克隆并使用 `setup-hermes.sh`:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/NousResearch/hermes-agent.git
|
||||
cd hermes-agent
|
||||
./setup-hermes.sh # 安装 uv、创建 venv、安装 .[all]、创建符号链接 ~/.local/bin/hermes
|
||||
./hermes # 自动检测 venv,无需先 source
|
||||
```
|
||||
|
||||
手动安装(等效于上述命令):
|
||||
|
||||
```bash
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv venv venv --python 3.11
|
||||
source venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
python -m pytest tests/ -q
|
||||
```
|
||||
|
||||
> **RL 训练(可选):** 如需参与 RL/Tinker-Atropos 集成开发:
|
||||
> ```bash
|
||||
> git submodule update --init tinker-atropos
|
||||
> uv pip install -e "./tinker-atropos"
|
||||
> ```
|
||||
|
||||
---
|
||||
|
||||
## 社区
|
||||
|
||||
- 💬 [Discord](https://discord.gg/NousResearch)
|
||||
- 📚 [技能中心](https://agentskills.io)
|
||||
- 🐛 [问题反馈](https://github.com/NousResearch/hermes-agent/issues)
|
||||
- 💡 [讨论区](https://github.com/NousResearch/hermes-agent/discussions)
|
||||
- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — 社区微信桥接:在同一微信账号上运行 Hermes Agent 和 OpenClaw。
|
||||
|
||||
---
|
||||
|
||||
## 许可证
|
||||
|
||||
MIT — 详见 [LICENSE](LICENSE)。
|
||||
|
||||
由 [Nous Research](https://nousresearch.com) 构建。
|
||||
@@ -0,0 +1,505 @@
|
||||
# Hermes Agent v0.12.0 (v2026.4.30)
|
||||
|
||||
**Release Date:** April 30, 2026
|
||||
**Since v0.11.0:** 1,096 commits · 550 merged PRs · 1,270 files changed · 217,776 insertions · 213 community contributors (including co-authors)
|
||||
|
||||
> The Curator release — Hermes Agent now maintains itself. An autonomous background Curator grades, prunes, and consolidates your skill library on its own schedule. The self-improvement loop that reviews what to save got a substantial upgrade. Four new inference providers, a 18th messaging platform, a 19th via Teams plugin, native Spotify + Google Meet integrations, ComfyUI and TouchDesigner-MCP moved from optional to bundled-by-default, and a ~57% cut to visible TUI cold start.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **Autonomous Curator** — `hermes curator` runs as a background agent on the gateway's cron ticker (7-day cycle default). It grades your skill library, consolidates related skills, prunes dead ones, and writes per-run reports to `logs/curator/run.json` + `REPORT.md`. Archived skills are classified consolidated-vs-pruned via model + heuristic. Defense-in-depth gates protect bundled/hub skills from mutation. Unified under `auxiliary.curator` — pick the curator's model in `hermes model`, manage it from the dashboard. `hermes curator status` ranks skills by usage (most-used / least-used). ([#17277](https://github.com/NousResearch/hermes-agent/pull/17277), [#17307](https://github.com/NousResearch/hermes-agent/pull/17307), [#17941](https://github.com/NousResearch/hermes-agent/pull/17941), [#17868](https://github.com/NousResearch/hermes-agent/pull/17868), [#18033](https://github.com/NousResearch/hermes-agent/pull/18033))
|
||||
|
||||
- **Self-improvement loop — substantially upgraded** — The background review fork (the core of Hermes' self-improvement: after each turn it decides what memories/skills to save or update) is now class-first (rubric-based rather than free-form), active-update biased (prefers the skill the agent just loaded), handles `references/`/`templates/` sub-files, and properly inherits the parent's live runtime (provider, model, credentials actually propagate). Restricted to memory + skills toolsets so it can't sprawl. Memory providers shut down cleanly. Prior-turn tool messages excluded from the summary so the fork sees a clean context. ([#16026](https://github.com/NousResearch/hermes-agent/pull/16026), [#17213](https://github.com/NousResearch/hermes-agent/pull/17213), [#16099](https://github.com/NousResearch/hermes-agent/pull/16099), [#16569](https://github.com/NousResearch/hermes-agent/pull/16569), [#16204](https://github.com/NousResearch/hermes-agent/pull/16204), [#15057](https://github.com/NousResearch/hermes-agent/pull/15057))
|
||||
|
||||
- **Skill integrations — major expansion** — **ComfyUI v5** with official CLI + REST + hardware-gated local install, moved from optional to **built-in by default** ([#17610](https://github.com/NousResearch/hermes-agent/pull/17610), [#17631](https://github.com/NousResearch/hermes-agent/pull/17631), [#17734](https://github.com/NousResearch/hermes-agent/pull/17734)). **TouchDesigner-MCP** bundled by default, expanded with GLSL, post-FX, audio, geometry, and 9 new reference docs ([#16753](https://github.com/NousResearch/hermes-agent/pull/16753), [#16624](https://github.com/NousResearch/hermes-agent/pull/16624), [#16768](https://github.com/NousResearch/hermes-agent/pull/16768) — @kshitijk4poor + @SHL0MS). **Humanizer** skill ports a text-cleaner that strips AI-isms ([#16787](https://github.com/NousResearch/hermes-agent/pull/16787)). **claude-design** HTML artifact skill + design-md (Google DESIGN.md spec) + airtable salvage + `skill_manage` edits in `external_dirs` + direct-URL skill install + `/reload-skills` slash command. ([#16358](https://github.com/NousResearch/hermes-agent/pull/16358), [#14876](https://github.com/NousResearch/hermes-agent/pull/14876), [#16291](https://github.com/NousResearch/hermes-agent/pull/16291), [#17512](https://github.com/NousResearch/hermes-agent/pull/17512), [#16323](https://github.com/NousResearch/hermes-agent/pull/16323), [#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
|
||||
|
||||
- **LM Studio — first-class provider** — upgraded from a custom-endpoint alias to a full-blown native provider: dedicated auth, `hermes doctor` checks, reasoning transport, live `/models` listing. (Salvage of @kshitijk4poor's #17061.) ([#17102](https://github.com/NousResearch/hermes-agent/pull/17102))
|
||||
|
||||
- **Four more new inference providers** — **GMI Cloud** (first-class, salvage of #11955 — @isaachuangGMICLOUD), **Azure AI Foundry** with auto-detection, **MiniMax OAuth** with PKCE browser flow (salvage #15203), **Tencent Tokenhub** (salvage of #16860). ([#16663](https://github.com/NousResearch/hermes-agent/pull/16663), [#15845](https://github.com/NousResearch/hermes-agent/pull/15845), [#17524](https://github.com/NousResearch/hermes-agent/pull/17524), [#16960](https://github.com/NousResearch/hermes-agent/pull/16960))
|
||||
|
||||
- **Pluggable gateway platforms + Microsoft Teams** — the gateway is now a plugin host. Drop-in messaging adapters live outside the core, and Microsoft Teams is the first plugin-shipped platform. (Salvage of #17664.) ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751), [#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
|
||||
- **Tencent 元宝 (Yuanbao) — 18th messaging platform** — native gateway adapter with text + media delivery. ([#16298](https://github.com/NousResearch/hermes-agent/pull/16298), [#17424](https://github.com/NousResearch/hermes-agent/pull/17424))
|
||||
|
||||
- **Spotify — native tools + bundled skill + wizard** — 7 tools (play, search, queue, playlists, devices) behind PKCE OAuth, interactive setup wizard, bundled skill, surfacing in `hermes tools`, cron usage documented. ([#15121](https://github.com/NousResearch/hermes-agent/pull/15121), [#15130](https://github.com/NousResearch/hermes-agent/pull/15130), [#15154](https://github.com/NousResearch/hermes-agent/pull/15154), [#15180](https://github.com/NousResearch/hermes-agent/pull/15180))
|
||||
|
||||
- **Google Meet plugin** — join calls, transcribe, speak, follow up. Realtime OpenAI transport + Node bot server, full pipeline bundled as a plugin. ([#16364](https://github.com/NousResearch/hermes-agent/pull/16364))
|
||||
|
||||
- **`hermes -z` one-shot mode + `hermes update --check`** — non-interactive `hermes -z <prompt>` with `--model`/`--provider`/`HERMES_INFERENCE_MODEL`. `hermes update --check` preflight. Opt-in pre-update HERMES_HOME backup. ([#15702](https://github.com/NousResearch/hermes-agent/pull/15702), [#15704](https://github.com/NousResearch/hermes-agent/pull/15704), [#15841](https://github.com/NousResearch/hermes-agent/pull/15841), [#16539](https://github.com/NousResearch/hermes-agent/pull/16539), [#16566](https://github.com/NousResearch/hermes-agent/pull/16566))
|
||||
|
||||
- **Models dashboard tab + in-browser model config** — rich per-model analytics, switch main + auxiliary models from the dashboard. ([#17745](https://github.com/NousResearch/hermes-agent/pull/17745), [#17802](https://github.com/NousResearch/hermes-agent/pull/17802))
|
||||
|
||||
- **Remote model catalog manifest** — OpenRouter + Nous Portal model catalogs are now pulled from a remote manifest so new models show up without a release. ([#16033](https://github.com/NousResearch/hermes-agent/pull/16033))
|
||||
|
||||
- **Native multimodal image routing** — images now route based on the model's actual vision capability rather than provider defaults. ([#16506](https://github.com/NousResearch/hermes-agent/pull/16506))
|
||||
|
||||
- **Gateway media parity** — native multi-image sending across Telegram, Discord, Slack, Mattermost, Email, and Signal; centralized audio routing with FLAC support + Telegram document fallback. ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909), [#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
|
||||
|
||||
- **TUI catches up to (and past) the classic CLI** — LaTeX rendering (@austinpickett), `/reload` .env hot-reload, pluggable busy-indicator styles (@OutThisLife, #13610), opt-in auto-resume of last session, expanded light-terminal auto-detection, session delete from `/resume` picker with `d`, modified mouse-wheel line scroll, and a `/mouse` toggle that kills ConPTY's phantom mouse injection (@kevin-ho). ([#17175](https://github.com/NousResearch/hermes-agent/pull/17175), [#17286](https://github.com/NousResearch/hermes-agent/pull/17286), [#17150](https://github.com/NousResearch/hermes-agent/pull/17150), [#17130](https://github.com/NousResearch/hermes-agent/pull/17130), [#17113](https://github.com/NousResearch/hermes-agent/pull/17113), [#17668](https://github.com/NousResearch/hermes-agent/pull/17668), [#17669](https://github.com/NousResearch/hermes-agent/pull/17669), [#15488](https://github.com/NousResearch/hermes-agent/pull/15488))
|
||||
|
||||
- **Observability + achievements plugins** — bundled Langfuse observability plugin (salvage #16845) + bundled hermes-achievements plugin that scans full session history. ([#16917](https://github.com/NousResearch/hermes-agent/pull/16917), [#17754](https://github.com/NousResearch/hermes-agent/pull/17754))
|
||||
|
||||
- **TTS provider registry + Piper local TTS** — pluggable `tts.providers.<name>` registry; Piper ships as a native local TTS provider. (Closes #8508.) ([#17843](https://github.com/NousResearch/hermes-agent/pull/17843), [#17885](https://github.com/NousResearch/hermes-agent/pull/17885))
|
||||
|
||||
- **Vercel Sandbox backend** — Vercel sandboxes as an execute_code/terminal backend (@kshitijk4poor). ([#17445](https://github.com/NousResearch/hermes-agent/pull/17445))
|
||||
|
||||
- **Secret redaction off by default** — default flipped to off. Prevents the long-standing patch-corruption incidents where fake secret-shaped substrings mangled tool outputs. Opt in via `redaction.enabled: true` when you need it. ([#16794](https://github.com/NousResearch/hermes-agent/pull/16794))
|
||||
|
||||
- **Cold-start performance** — visible TUI cold start cut **~57%** via lazy agent init (@OutThisLife), lazy imports of OpenAI / Anthropic / Firecrawl / account_usage, mtime-cached `load_config()`, memoized `get_tool_definitions()` with TTL-cached `check_fn` results, precompiled dangerous-command patterns. ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190), [#17046](https://github.com/NousResearch/hermes-agent/pull/17046), [#17041](https://github.com/NousResearch/hermes-agent/pull/17041), [#17098](https://github.com/NousResearch/hermes-agent/pull/17098), [#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
|
||||
|
||||
- **Configurable prompt cache TTL** — `prompt_caching.cache_ttl` (5m default, 1h opt-in — cost savings for bursty sessions that keep cache warm). Salvage of #12659. ([#15065](https://github.com/NousResearch/hermes-agent/pull/15065))
|
||||
|
||||
---
|
||||
|
||||
## 🧠 Autonomous Curator & Self-Improvement Loop
|
||||
|
||||
### Curator — autonomous skill maintenance
|
||||
- **`hermes curator` as a background agent** — runs on the gateway's cron ticker, 7-day cycle by default, umbrella-first prompt, inherits parent config, unbounded iterations ([#17277](https://github.com/NousResearch/hermes-agent/pull/17277) — issue #7816)
|
||||
- **Per-run reports** — `logs/curator/run.json` + `REPORT.md` per cycle ([#17307](https://github.com/NousResearch/hermes-agent/pull/17307))
|
||||
- **Consolidated vs pruned classification** — archived skills split with model + heuristic ([#17941](https://github.com/NousResearch/hermes-agent/pull/17941))
|
||||
- **`hermes curator status`** — ranks skills by usage, shows most-used and least-used ([#18033](https://github.com/NousResearch/hermes-agent/pull/18033))
|
||||
- **Unified under `auxiliary.curator`** — pick the model in `hermes model`, configure from the dashboard ([#17868](https://github.com/NousResearch/hermes-agent/pull/17868))
|
||||
- **Documentation** — dedicated curator feature page on the docs site ([#17563](https://github.com/NousResearch/hermes-agent/pull/17563))
|
||||
- Fix: seed defaults on update, create `logs/curator/` directory, defer fire import ([#17927](https://github.com/NousResearch/hermes-agent/pull/17927))
|
||||
- Fix: scan nested archive subdirs in `restore_skill` (@0xDevNinja) ([#17951](https://github.com/NousResearch/hermes-agent/pull/17951))
|
||||
- Fix: use actual skill activity in curator status (@y0shua1ee) ([#17953](https://github.com/NousResearch/hermes-agent/pull/17953))
|
||||
- Fix: `skill_manage` refuses writes on pinned skills; pinning now blocks curator writes ([#17562](https://github.com/NousResearch/hermes-agent/pull/17562), [#17578](https://github.com/NousResearch/hermes-agent/pull/17578))
|
||||
- Fix: `bump_use()` wired into skill invocation + preload + skill_view (salvage #17782) ([#17932](https://github.com/NousResearch/hermes-agent/pull/17932))
|
||||
|
||||
### Self-improvement loop (background review fork)
|
||||
- **Class-first skill-review prompt** — rubric-based grading rather than free-form "should this update" ([#16026](https://github.com/NousResearch/hermes-agent/pull/16026))
|
||||
- **Active-update bias** — prefers updating skills the agent just loaded, handles `references/` + `templates/` sub-files ([#17213](https://github.com/NousResearch/hermes-agent/pull/17213))
|
||||
- **Fork inherits parent's live runtime** — provider, model, credentials actually propagate now ([#16099](https://github.com/NousResearch/hermes-agent/pull/16099))
|
||||
- **Scoped toolsets** — review fork restricted to memory + skills (no shell, no web) ([#16569](https://github.com/NousResearch/hermes-agent/pull/16569))
|
||||
- **Clean shutdown** — background review memory providers exit properly (salvage #15289) ([#16204](https://github.com/NousResearch/hermes-agent/pull/16204))
|
||||
- **Clean context** — prior-history tool messages excluded from review summary (salvage #14967) ([#15057](https://github.com/NousResearch/hermes-agent/pull/15057))
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
### Skill integrations — newly bundled or promoted
|
||||
- **ComfyUI v5** — official CLI + REST + hardware-gated local install; **moved from optional to built-in** ([#17610](https://github.com/NousResearch/hermes-agent/pull/17610), [#17631](https://github.com/NousResearch/hermes-agent/pull/17631), [#17734](https://github.com/NousResearch/hermes-agent/pull/17734), [#17612](https://github.com/NousResearch/hermes-agent/pull/17612))
|
||||
- **TouchDesigner-MCP** — **bundled by default** ([#16753](https://github.com/NousResearch/hermes-agent/pull/16753) — @kshitijk4poor), expanded with GLSL, post-FX, audio, geometry references ([#16624](https://github.com/NousResearch/hermes-agent/pull/16624)), 9 new reference docs ([#16768](https://github.com/NousResearch/hermes-agent/pull/16768) — @SHL0MS)
|
||||
- **Humanizer** — strips AI-isms from text ([#16787](https://github.com/NousResearch/hermes-agent/pull/16787))
|
||||
- **claude-design** — HTML artifact skill with disambiguation from other design skills ([#16358](https://github.com/NousResearch/hermes-agent/pull/16358))
|
||||
- **design-md** — Google's DESIGN.md spec skill ([#14876](https://github.com/NousResearch/hermes-agent/pull/14876))
|
||||
- **airtable** — salvaged skill + skill API keys wired into `.env` (#15838) ([#16291](https://github.com/NousResearch/hermes-agent/pull/16291))
|
||||
- **pretext** — creative browser demos with @chenglou/pretext ([#17259](https://github.com/NousResearch/hermes-agent/pull/17259))
|
||||
- **spike** + **sketch** — throwaway experiments + HTML mockups, adapted from gsd-build ([#17421](https://github.com/NousResearch/hermes-agent/pull/17421))
|
||||
|
||||
### Skills UX
|
||||
- **Install skills from a direct HTTP(S) URL** — `hermes skills install <url>` ([#16323](https://github.com/NousResearch/hermes-agent/pull/16323))
|
||||
- **`/reload-skills`** slash command (salvage #17670) ([#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
|
||||
- **`hermes skills list`** shows enabled/disabled status ([#16129](https://github.com/NousResearch/hermes-agent/pull/16129))
|
||||
- **`skill_manage` refuses writes on pinned skills** ([#17562](https://github.com/NousResearch/hermes-agent/pull/17562))
|
||||
- **`skill_manage` edits external_dirs skills in place** (salvage #9966) ([#17512](https://github.com/NousResearch/hermes-agent/pull/17512), [#17289](https://github.com/NousResearch/hermes-agent/pull/17289))
|
||||
- Fix: inline-shell rendering in `skill_view` ([#15376](https://github.com/NousResearch/hermes-agent/pull/15376))
|
||||
- Fix: exclude `.archive/` from skill index walk (salvage #17639) ([#17931](https://github.com/NousResearch/hermes-agent/pull/17931))
|
||||
- Fix: dedicated docs page per bundled + optional skill ([#14929](https://github.com/NousResearch/hermes-agent/pull/14929))
|
||||
- Fix: `google-workspace` shared HERMES_HOME helper + ship deps as optional extra ([#15405](https://github.com/NousResearch/hermes-agent/pull/15405))
|
||||
- Fix: auto-wrap ASCII-art code blocks in generated skill pages ([#16497](https://github.com/NousResearch/hermes-agent/pull/16497))
|
||||
- Point agent at `hermes-agent` skill + docs site for Hermes questions ([#16535](https://github.com/NousResearch/hermes-agent/pull/16535))
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Provider & Model Support
|
||||
|
||||
#### New providers
|
||||
- **GMI Cloud** — first-class API-key provider on par with Arcee/Kilocode/Xiaomi (salvage of #11955 — @isaachuangGMICLOUD) ([#16663](https://github.com/NousResearch/hermes-agent/pull/16663))
|
||||
- **Azure AI Foundry** — auto-detection, full wiring ([#15845](https://github.com/NousResearch/hermes-agent/pull/15845))
|
||||
- **LM Studio** — upgraded from custom-endpoint alias to first-class provider: dedicated auth, doctor checks, reasoning transport, live `/models` (salvage of #17061 — @kshitijk4poor) ([#17102](https://github.com/NousResearch/hermes-agent/pull/17102))
|
||||
- **MiniMax OAuth** — PKCE browser flow with full OAuth integration (salvage #15203) ([#17524](https://github.com/NousResearch/hermes-agent/pull/17524))
|
||||
- **Tencent Tokenhub** — new provider (salvage of #16860) ([#16960](https://github.com/NousResearch/hermes-agent/pull/16960))
|
||||
|
||||
#### Model catalog
|
||||
- **Remote model catalog manifest** — OpenRouter + Nous Portal catalogs pulled from remote manifest so new models show up without a release ([#16033](https://github.com/NousResearch/hermes-agent/pull/16033))
|
||||
- `openai/gpt-5.5` and `gpt-5.5-pro` added to OpenRouter + Nous Portal ([#15343](https://github.com/NousResearch/hermes-agent/pull/15343))
|
||||
- `deepseek-v4-pro` and `deepseek-v4-flash` added ([#14934](https://github.com/NousResearch/hermes-agent/pull/14934))
|
||||
- `qwen3.6-plus` added to Alibaba-supported models ([#16896](https://github.com/NousResearch/hermes-agent/pull/16896))
|
||||
- Gemini free-tier keys blocked at setup with 429 guidance surfacing ([#15100](https://github.com/NousResearch/hermes-agent/pull/15100))
|
||||
|
||||
#### Model configuration
|
||||
- **Configurable `prompt_caching.cache_ttl`** — 5m default, 1h opt-in (salvage #12659) ([#15065](https://github.com/NousResearch/hermes-agent/pull/15065))
|
||||
- `/fast` whitelist broadened to all OpenAI + Anthropic models ([#16883](https://github.com/NousResearch/hermes-agent/pull/16883))
|
||||
- `auxiliary.extra_body.reasoning` translates into Codex Responses API ([#17004](https://github.com/NousResearch/hermes-agent/pull/17004))
|
||||
- `hermes fallback` command for managing fallback providers ([#16052](https://github.com/NousResearch/hermes-agent/pull/16052))
|
||||
|
||||
### Agent Loop & Conversation
|
||||
- **Native multimodal image routing** — based on model vision capability, not provider defaults ([#16506](https://github.com/NousResearch/hermes-agent/pull/16506))
|
||||
- **Delegate `child_timeout_seconds` default bumped to 600s** ([#14809](https://github.com/NousResearch/hermes-agent/pull/14809))
|
||||
- **Diagnostic dump when subagent times out with 0 API calls** ([#15105](https://github.com/NousResearch/hermes-agent/pull/15105))
|
||||
- **Gateway busts cached agent on compression/context_length config edits** ([#17008](https://github.com/NousResearch/hermes-agent/pull/17008))
|
||||
- **Opt-in runtime-metadata footer on final replies** ([#17026](https://github.com/NousResearch/hermes-agent/pull/17026))
|
||||
- `/reload-mcp` awareness — rebuild cached agents + prompt-cache cost confirmation ([#17729](https://github.com/NousResearch/hermes-agent/pull/17729))
|
||||
- Fix: repair CamelCase + `_tool` suffix tool-call emissions ([#15124](https://github.com/NousResearch/hermes-agent/pull/15124))
|
||||
- Fix: retry on `json.JSONDecodeError` instead of treating as local validation error ([#15107](https://github.com/NousResearch/hermes-agent/pull/15107))
|
||||
- Fix: handle unescaped control chars in `tool_call.arguments` ([#15356](https://github.com/NousResearch/hermes-agent/pull/15356))
|
||||
- Fix: ordering fix in `_copy_reasoning_content_for_api` — cross-provider reasoning isolation (@Zjianru) ([#15749](https://github.com/NousResearch/hermes-agent/pull/15749))
|
||||
- Fix: inject empty `reasoning_content` for DeepSeek/Kimi `tool_calls` unconditionally (@Zjianru) ([#15762](https://github.com/NousResearch/hermes-agent/pull/15762))
|
||||
- Fix: persist streamed `reasoning_content` on assistant turns (#16844) ([#16892](https://github.com/NousResearch/hermes-agent/pull/16892))
|
||||
- Fix: cancel coroutine on timeout so worker thread exits; full traceback on tool failure ([#17428](https://github.com/NousResearch/hermes-agent/pull/17428))
|
||||
- Fix: isolate `get_tool_definitions` quiet_mode cache + dedup LCM injection (#17335) ([#17889](https://github.com/NousResearch/hermes-agent/pull/17889))
|
||||
- Fix: serialize concurrent `hermes_tools` RPC calls from `execute_code` (#17770) ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
|
||||
- Fix: rename `[SYSTEM:` → `[IMPORTANT:` in all user-injected markers (dodges Azure content filter) ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
|
||||
|
||||
### Compression
|
||||
- **Retry summary on main model for unknown errors before giving up** ([#16774](https://github.com/NousResearch/hermes-agent/pull/16774))
|
||||
- **Notify users when configured aux model fails even if main-model fallback recovers** ([#16775](https://github.com/NousResearch/hermes-agent/pull/16775))
|
||||
- `/compress` wrapped in `_busy_command` to block input during compression ([#15388](https://github.com/NousResearch/hermes-agent/pull/15388))
|
||||
- Fix: reserve system + tools headroom when aux binds threshold ([#15631](https://github.com/NousResearch/hermes-agent/pull/15631))
|
||||
- Fix: use text-char sum for multimodal token estimation in `_find_tail_cut_by_tokens` ([#16369](https://github.com/NousResearch/hermes-agent/pull/16369))
|
||||
|
||||
### Session, Memory & State
|
||||
- **Trigram FTS5 index for CJK search, replace LIKE fallback** (@alt-glitch) ([#16651](https://github.com/NousResearch/hermes-agent/pull/16651))
|
||||
- **Index `tool_name` + `tool_calls` in FTS5, with repair + migration** (salvages #16866) ([#16914](https://github.com/NousResearch/hermes-agent/pull/16914))
|
||||
- **Checkpoints: auto-prune orphan and stale shadow repos at startup** ([#16303](https://github.com/NousResearch/hermes-agent/pull/16303))
|
||||
- **Memory providers notified on mid-process session_id rotation** (#6672) ([#17409](https://github.com/NousResearch/hermes-agent/pull/17409))
|
||||
- Fix: quote underscored terms in FTS5 query sanitization ([#16915](https://github.com/NousResearch/hermes-agent/pull/16915))
|
||||
- Fix: resolve viking_read 500/412 on file URIs + pseudo-summary URIs (salvage #5886) ([#17869](https://github.com/NousResearch/hermes-agent/pull/17869))
|
||||
- Fix: skip external-provider sync on interrupted turns ([#15395](https://github.com/NousResearch/hermes-agent/pull/15395))
|
||||
- Fix: close embedded Hindsight async client cleanly (salvage #14605) ([#16209](https://github.com/NousResearch/hermes-agent/pull/16209))
|
||||
- Fix: pass session transcript to `shutdown_memory_provider` on gateway + CLI (#15165) ([#16571](https://github.com/NousResearch/hermes-agent/pull/16571))
|
||||
- Fix: write-origin metadata seam ([#15346](https://github.com/NousResearch/hermes-agent/pull/15346))
|
||||
- Fix: preserve symlinks during atomic file writes ([#16980](https://github.com/NousResearch/hermes-agent/pull/16980))
|
||||
- Refactor: remove `flush_memories` entirely ([#15696](https://github.com/NousResearch/hermes-agent/pull/15696))
|
||||
|
||||
### Auxiliary models
|
||||
- Fix: surface auxiliary failures in UI (previously silent) ([#15324](https://github.com/NousResearch/hermes-agent/pull/15324))
|
||||
- Fix: surface title-gen auxiliary failures instead of silently dropping ([#16371](https://github.com/NousResearch/hermes-agent/pull/16371))
|
||||
- Fix: generalize unsupported-parameter detector and harden `max_tokens` retry ([#15633](https://github.com/NousResearch/hermes-agent/pull/15633))
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### New Platforms
|
||||
- **Microsoft Teams (19th platform)** — as a plugin, + xdist collision guard ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
- **Yuanbao (Tencent 元宝, 18th platform)** — native adapter with text + media delivery ([#16298](https://github.com/NousResearch/hermes-agent/pull/16298), [#17424](https://github.com/NousResearch/hermes-agent/pull/17424), [#16880](https://github.com/NousResearch/hermes-agent/pull/16880))
|
||||
|
||||
### Pluggable Gateway Platforms
|
||||
- **Drop-in messaging adapters** — the gateway is now a plugin host for platforms (salvage of #17664) ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751))
|
||||
|
||||
### Telegram
|
||||
- **Chat allowlists for groups and forums** (@web3blind) ([#15027](https://github.com/NousResearch/hermes-agent/pull/15027))
|
||||
- **Send fresh finals for stale preview streams** (port openclaw#72038) ([#16261](https://github.com/NousResearch/hermes-agent/pull/16261))
|
||||
- **Render markdown tables as row-group bullets + prompt hint** ([#16997](https://github.com/NousResearch/hermes-agent/pull/16997))
|
||||
- Document fallback in centralized audio routing ([#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
|
||||
- Native multi-image sending ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
|
||||
### Discord
|
||||
- **Opt-in toolsets + ID injection + tool split + Feishu wiring** (salvage #15457, #15458) ([#15610](https://github.com/NousResearch/hermes-agent/pull/15610), [#15613](https://github.com/NousResearch/hermes-agent/pull/15613))
|
||||
- Fix: coerce `limit` parameter to int before `min()` call ([#16319](https://github.com/NousResearch/hermes-agent/pull/16319))
|
||||
|
||||
### Slack
|
||||
- **Register every gateway command as a native slash (Discord/Telegram parity)** ([#16164](https://github.com/NousResearch/hermes-agent/pull/16164))
|
||||
- **`strict_mention` config** — prevents thread auto-engagement ([#16193](https://github.com/NousResearch/hermes-agent/pull/16193))
|
||||
- **`channel_skill_bindings`** — bind specific skills to specific Slack channels ([#16283](https://github.com/NousResearch/hermes-agent/pull/16283))
|
||||
|
||||
### Signal
|
||||
- **Native formatting** — markdown → bodyRanges, reply quotes, reactions ([#17417](https://github.com/NousResearch/hermes-agent/pull/17417))
|
||||
- Native multi-image sending ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
|
||||
### Feishu / Mattermost / Email / Signal
|
||||
- All participate in **native multi-image sending** ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
|
||||
### Gateway Core
|
||||
- **Centralized audio routing + FLAC support + Telegram doc fallback** ([#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
|
||||
- **Native multi-image sending** across Telegram, Discord, Slack, Mattermost, Email, Signal ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
- **Make hygiene hard message limit configurable** ([#17000](https://github.com/NousResearch/hermes-agent/pull/17000))
|
||||
- **Opt-in runtime-metadata footer on final replies** ([#17026](https://github.com/NousResearch/hermes-agent/pull/17026))
|
||||
- **`pre_gateway_dispatch` hook** — plugins can intercept before dispatch ([#15050](https://github.com/NousResearch/hermes-agent/pull/15050))
|
||||
- **`pre_approval_request` / `post_approval_response` hooks** ([#16776](https://github.com/NousResearch/hermes-agent/pull/16776))
|
||||
- Fix: timeouts — guard `load_config()` call against runtime exceptions ([#16318](https://github.com/NousResearch/hermes-agent/pull/16318))
|
||||
- Fix: support passing handler tools via registry ([#15613](https://github.com/NousResearch/hermes-agent/pull/15613))
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tool System
|
||||
|
||||
### Plugin-first architecture
|
||||
- **Pluggable gateway platforms** — platforms can ship as plugins ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751))
|
||||
- **Microsoft Teams as first plugin-shipped platform** ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
- **`pre_gateway_dispatch` hook** ([#15050](https://github.com/NousResearch/hermes-agent/pull/15050))
|
||||
- **`pre_approval_request` + `post_approval_response` hooks** ([#16776](https://github.com/NousResearch/hermes-agent/pull/16776))
|
||||
- **`duration_ms` on `post_tool_call`** (inspired by Claude Code 2.1.119) ([#15429](https://github.com/NousResearch/hermes-agent/pull/15429))
|
||||
- **Bundled plugins**: Spotify ([#15174](https://github.com/NousResearch/hermes-agent/pull/15174)), Google Meet ([#16364](https://github.com/NousResearch/hermes-agent/pull/16364)), Langfuse observability ([#16917](https://github.com/NousResearch/hermes-agent/pull/16917)), hermes-achievements ([#17754](https://github.com/NousResearch/hermes-agent/pull/17754))
|
||||
- **Page-scoped plugin slots for built-in dashboard pages** ([#15658](https://github.com/NousResearch/hermes-agent/pull/15658))
|
||||
- **Declarative plugin installation for NixOS module** (@alt-glitch) ([#15953](https://github.com/NousResearch/hermes-agent/pull/15953))
|
||||
|
||||
### Browser
|
||||
- **CDP supervisor** — dialog detection + response + cross-origin iframe eval ([#14540](https://github.com/NousResearch/hermes-agent/pull/14540))
|
||||
- **Auto-spawn local Chromium for LAN/localhost URLs** when cloud provider is configured ([#16136](https://github.com/NousResearch/hermes-agent/pull/16136))
|
||||
|
||||
### Execute code / Terminal
|
||||
- **Vercel Sandbox backend** for `execute_code` / terminal (@kshitijk4poor) ([#17445](https://github.com/NousResearch/hermes-agent/pull/17445))
|
||||
- **Collapse subagent `task_id`s to shared container** ([#16177](https://github.com/NousResearch/hermes-agent/pull/16177))
|
||||
- **Docker: run container as host user** to avoid root-owned bind mounts (@benbarclay) ([#17305](https://github.com/NousResearch/hermes-agent/pull/17305))
|
||||
- Fix: safely quote `~/` subpaths in wrapped `cd` commands ([#15394](https://github.com/NousResearch/hermes-agent/pull/15394))
|
||||
- Fix: close file descriptor in `LocalEnvironment._update_cwd` ([#17300](https://github.com/NousResearch/hermes-agent/pull/17300))
|
||||
- Fix: SSH — prevent tar from overwriting remote home dir permissions ([#17898](https://github.com/NousResearch/hermes-agent/pull/17898), [#17867](https://github.com/NousResearch/hermes-agent/pull/17867))
|
||||
|
||||
### Image generation
|
||||
- See Provider section for updates; no new image providers this window.
|
||||
|
||||
### TTS / Voice
|
||||
- **Pluggable TTS provider registry** under `tts.providers.<name>` ([#17843](https://github.com/NousResearch/hermes-agent/pull/17843))
|
||||
- **Piper** as native local TTS provider (closes #8508) ([#17885](https://github.com/NousResearch/hermes-agent/pull/17885))
|
||||
- **Voice mode CLI parity in the TUI** — VAD loop + TTS + crash forensics ([#14810](https://github.com/NousResearch/hermes-agent/pull/14810))
|
||||
- Fix: vision — use HERMES_HOME-based cache dir instead of cwd ([#17719](https://github.com/NousResearch/hermes-agent/pull/17719))
|
||||
|
||||
### Cron
|
||||
- **Honor `hermes tools` config for the cron platform** ([#14798](https://github.com/NousResearch/hermes-agent/pull/14798))
|
||||
- **Per-job `workdir`** — project-aware cron runs ([#15110](https://github.com/NousResearch/hermes-agent/pull/15110))
|
||||
- **`context_from` field** — chain cron job outputs ([#15606](https://github.com/NousResearch/hermes-agent/pull/15606))
|
||||
- Fix: promote `croniter` to a core dependency ([#17577](https://github.com/NousResearch/hermes-agent/pull/17577))
|
||||
|
||||
### Web search
|
||||
- **Expose `limit` for `web_search`** ([#16934](https://github.com/NousResearch/hermes-agent/pull/16934))
|
||||
|
||||
### Maps
|
||||
- Fix: include seconds in timezone UTC offset output ([#16300](https://github.com/NousResearch/hermes-agent/pull/16300))
|
||||
|
||||
### Approvals
|
||||
- **Hardline blocklist for unrecoverable commands** ([#15878](https://github.com/NousResearch/hermes-agent/pull/15878))
|
||||
- Perf: precompile DANGEROUS_PATTERNS and HARDLINE_PATTERNS ([#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
|
||||
|
||||
### ACP
|
||||
- **Advertise and forward image prompts** ([#18030](https://github.com/NousResearch/hermes-agent/pull/18030))
|
||||
|
||||
### API Server
|
||||
- **POST `/v1/runs/{run_id}/stop`** (salvage of #15656) ([#15842](https://github.com/NousResearch/hermes-agent/pull/15842))
|
||||
- **Expose run status for external UIs** (#17085) ([#17458](https://github.com/NousResearch/hermes-agent/pull/17458))
|
||||
|
||||
### Nix
|
||||
- **Declarative plugin installation for NixOS module** (@alt-glitch) ([#15953](https://github.com/NousResearch/hermes-agent/pull/15953))
|
||||
- Fix: use `--rebuild` in fix-lockfiles to bypass cached FOD store paths ([#15444](https://github.com/NousResearch/hermes-agent/pull/15444))
|
||||
- Fix: `extraPackages` now actually works via per-user profile ([#17047](https://github.com/NousResearch/hermes-agent/pull/17047))
|
||||
- Fix: refresh web/ npm-deps hash to unblock main builds ([#17174](https://github.com/NousResearch/hermes-agent/pull/17174))
|
||||
- Fix: replace magic-nix-cache with Cachix ([#17928](https://github.com/NousResearch/hermes-agent/pull/17928))
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ TUI
|
||||
|
||||
### New features
|
||||
- **LaTeX rendering** (@austinpickett) ([#17175](https://github.com/NousResearch/hermes-agent/pull/17175))
|
||||
- **`/reload` .env hot-reload** — ported from the classic CLI ([#17286](https://github.com/NousResearch/hermes-agent/pull/17286))
|
||||
- **Pluggable busy-indicator styles** (@OutThisLife, #13610) ([#17150](https://github.com/NousResearch/hermes-agent/pull/17150))
|
||||
- **Opt-in auto-resume of the most recent session** (@OutThisLife) ([#17130](https://github.com/NousResearch/hermes-agent/pull/17130))
|
||||
- **Expanded light-terminal auto-detection** — `HERMES_TUI_THEME` + background hex (@OutThisLife) ([#17113](https://github.com/NousResearch/hermes-agent/pull/17113))
|
||||
- **Delete sessions from `/resume` picker with `d`** (@OutThisLife) ([#17668](https://github.com/NousResearch/hermes-agent/pull/17668))
|
||||
- **Line-by-line scroll on modified mouse wheel** (@OutThisLife) ([#17669](https://github.com/NousResearch/hermes-agent/pull/17669))
|
||||
- **Delete queued message while editing with ctrl-x / cancel with esc** (@OutThisLife) ([#16707](https://github.com/NousResearch/hermes-agent/pull/16707))
|
||||
- **Per-section visibility for the details accordion** (@OutThisLife) ([#14968](https://github.com/NousResearch/hermes-agent/pull/14968))
|
||||
- **Voice mode CLI parity** — VAD loop + TTS + crash forensics ([#14810](https://github.com/NousResearch/hermes-agent/pull/14810))
|
||||
- **Contextual first-touch hints ported to TUI** — `/busy`, `/verbose` ([#16054](https://github.com/NousResearch/hermes-agent/pull/16054))
|
||||
- **Mini help menu on `?` in the input field** (@ethernet8023) ([#18043](https://github.com/NousResearch/hermes-agent/pull/18043))
|
||||
|
||||
### Fixes
|
||||
- Fix: proactive mouse disable on ConPTY + `/mouse` toggle command (@kevin-ho, WSL2 ghost-mouse fix) ([#15488](https://github.com/NousResearch/hermes-agent/pull/15488))
|
||||
- Fix: restore skills search RPC ([#15870](https://github.com/NousResearch/hermes-agent/pull/15870))
|
||||
- Perf: cache text measurements across yoga flex re-passes ([#14818](https://github.com/NousResearch/hermes-agent/pull/14818))
|
||||
- Perf: stabilize long-session scrolling ([#15926](https://github.com/NousResearch/hermes-agent/pull/15926))
|
||||
- Perf: lazily seed virtual history heights ([#16523](https://github.com/NousResearch/hermes-agent/pull/16523))
|
||||
- Perf: cut visible cold start ~57% with lazy agent init ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190))
|
||||
|
||||
---
|
||||
|
||||
## 🖱️ CLI & User Experience
|
||||
|
||||
### New commands
|
||||
- **`hermes -z <prompt>`** — non-interactive one-shot mode ([#15702](https://github.com/NousResearch/hermes-agent/pull/15702))
|
||||
- **`hermes -z` with `--model` / `--provider` / `HERMES_INFERENCE_MODEL`** ([#15704](https://github.com/NousResearch/hermes-agent/pull/15704))
|
||||
- **`hermes update --check`** preflight flag ([#15841](https://github.com/NousResearch/hermes-agent/pull/15841))
|
||||
- **`hermes fallback`** command for managing fallback providers ([#16052](https://github.com/NousResearch/hermes-agent/pull/16052))
|
||||
- **`/busy`** slash command for busy input mode ([#15382](https://github.com/NousResearch/hermes-agent/pull/15382))
|
||||
- **`/busy` input mode 'steer'** as a third option ([#16279](https://github.com/NousResearch/hermes-agent/pull/16279))
|
||||
- **`/btw` as alias for `/background`** ([#16053](https://github.com/NousResearch/hermes-agent/pull/16053))
|
||||
- **`/reload-skills`** slash command (salvage #17670) ([#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
|
||||
- **Surface `/queue`, `/bg`, `/steer` in agent-running placeholder** ([#16118](https://github.com/NousResearch/hermes-agent/pull/16118))
|
||||
|
||||
### Setup / onboarding
|
||||
- **Auto-reconfigure on existing installs** ([#15879](https://github.com/NousResearch/hermes-agent/pull/15879))
|
||||
- **Contextual first-touch hints for `/busy` and `/verbose`** ([#16046](https://github.com/NousResearch/hermes-agent/pull/16046))
|
||||
- **Cost-saving tips from the April 30 tip-of-the-day** ([#17841](https://github.com/NousResearch/hermes-agent/pull/17841))
|
||||
- **Hyperlink startup banner title to the latest GitHub Release** ([#14945](https://github.com/NousResearch/hermes-agent/pull/14945))
|
||||
|
||||
### Update / backup
|
||||
- **Snapshot pairing data before `git pull`** ([#16383](https://github.com/NousResearch/hermes-agent/pull/16383))
|
||||
- **Auto-backup HERMES_HOME before `hermes update`** (opt-in, off by default) ([#16539](https://github.com/NousResearch/hermes-agent/pull/16539), [#16566](https://github.com/NousResearch/hermes-agent/pull/16566))
|
||||
- **Exclude `checkpoints/` from backups** ([#16572](https://github.com/NousResearch/hermes-agent/pull/16572))
|
||||
- **Exclude SQLite WAL/SHM/journal sidecars from backups** ([#16576](https://github.com/NousResearch/hermes-agent/pull/16576))
|
||||
- **Installer FHS layout for root installs on Linux** ([#15608](https://github.com/NousResearch/hermes-agent/pull/15608))
|
||||
- Fix: kill stale dashboards instead of warning ([#17832](https://github.com/NousResearch/hermes-agent/pull/17832))
|
||||
- Fix: show correct update status on nix-built hermes ([#17550](https://github.com/NousResearch/hermes-agent/pull/17550))
|
||||
|
||||
### Slash-command housekeeping
|
||||
- Refactor: drop `/provider`, `/plan` handler, and clean up slash registry ([#15047](https://github.com/NousResearch/hermes-agent/pull/15047))
|
||||
- Refactor: drop `persist_session` plumbing + fix broken `/btw` mid-turn bypass ([#16075](https://github.com/NousResearch/hermes-agent/pull/16075))
|
||||
|
||||
### OpenClaw migration (for folks coming from OpenClaw)
|
||||
- **Hardened OpenClaw import** — plan-first apply, redaction, pre-migration backup ([#16911](https://github.com/NousResearch/hermes-agent/pull/16911))
|
||||
- Fix: case-preserving brand rewrite + one-time `~/.openclaw` residue banner ([#16327](https://github.com/NousResearch/hermes-agent/pull/16327))
|
||||
- Fix: resolve `openclaw` workspace files from `agents.defaults.workspace` ([#16879](https://github.com/NousResearch/hermes-agent/pull/16879))
|
||||
- Fix: resolve model aliases against real OpenClaw catalog schema (salvage #16778) ([#16977](https://github.com/NousResearch/hermes-agent/pull/16977))
|
||||
|
||||
---
|
||||
|
||||
## 📊 Web Dashboard
|
||||
|
||||
- **Models tab** — rich per-model analytics ([#17745](https://github.com/NousResearch/hermes-agent/pull/17745))
|
||||
- **Configure main + auxiliary models from the Models page** ([#17802](https://github.com/NousResearch/hermes-agent/pull/17802))
|
||||
- **Dashboard Chat tab — xterm.js + JSON-RPC sidecar** (supersedes #12710 + #13379, @OutThisLife) ([#14890](https://github.com/NousResearch/hermes-agent/pull/14890))
|
||||
- **Dashboard layout refresh** (@austinpickett) ([#14899](https://github.com/NousResearch/hermes-agent/pull/14899))
|
||||
- **`--stop` and `--status` flags** on the dashboard CLI ([#17840](https://github.com/NousResearch/hermes-agent/pull/17840))
|
||||
- **Page-scoped plugin slots for built-in pages** ([#15658](https://github.com/NousResearch/hermes-agent/pull/15658))
|
||||
- Fix: replace all buttons for design system buttons ([#17007](https://github.com/NousResearch/hermes-agent/pull/17007))
|
||||
|
||||
---
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
- **TUI visible cold start cut ~57%** via lazy agent init ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190))
|
||||
- **Lazy-import OpenAI, Anthropic, Firecrawl, account_usage** ([#17046](https://github.com/NousResearch/hermes-agent/pull/17046))
|
||||
- **mtime-cache `load_config()` and `read_raw_config()`** ([#17041](https://github.com/NousResearch/hermes-agent/pull/17041))
|
||||
- **Memoize `get_tool_definitions()` + TTL-cache `check_fn` results** ([#17098](https://github.com/NousResearch/hermes-agent/pull/17098))
|
||||
- **Precompile DANGEROUS_PATTERNS and HARDLINE_PATTERNS** ([#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
|
||||
- **Cache Ink text measurements across yoga flex re-passes** ([#14818](https://github.com/NousResearch/hermes-agent/pull/14818))
|
||||
- **Stabilize long-session scrolling** ([#15926](https://github.com/NousResearch/hermes-agent/pull/15926))
|
||||
- **Lazily seed virtual history heights** ([#16523](https://github.com/NousResearch/hermes-agent/pull/16523))
|
||||
|
||||
---
|
||||
|
||||
## 🔒 Security & Reliability
|
||||
|
||||
- **Secret redaction off by default** — stops corrupting patches / API payloads with fake-key substitutions. Opt in via `redaction.enabled: true` ([#16794](https://github.com/NousResearch/hermes-agent/pull/16794))
|
||||
- **`[SYSTEM:` → `[IMPORTANT:`** in all user-injected markers (Azure content filter dodge) ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
|
||||
- **Hardline blocklist for unrecoverable commands** ([#15878](https://github.com/NousResearch/hermes-agent/pull/15878))
|
||||
- **Canonical `mask_secret` helper; fix status.py DIM drift** ([#17207](https://github.com/NousResearch/hermes-agent/pull/17207))
|
||||
- **Sweep expired paste.rs uploads on a real timer** ([#16431](https://github.com/NousResearch/hermes-agent/pull/16431))
|
||||
- **Preserve symlinks during atomic file writes** ([#16980](https://github.com/NousResearch/hermes-agent/pull/16980))
|
||||
- **Probe `/dev/tty` by opening it, not bare existence** ([#17024](https://github.com/NousResearch/hermes-agent/pull/17024))
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Notable Bug Fixes
|
||||
|
||||
This window includes 360 `fix:` PRs. Selected highlights from across the stack:
|
||||
|
||||
- **Background review fork inherits parent's live runtime** — provider/model/creds now propagate correctly ([#16099](https://github.com/NousResearch/hermes-agent/pull/16099))
|
||||
- **Hindsight configurable `HINDSIGHT_TIMEOUT` env var** ([#15077](https://github.com/NousResearch/hermes-agent/pull/15077))
|
||||
- **Tools: normalize numeric entries + clear stale `no_mcp` in `_save_platform_tools`** ([#15607](https://github.com/NousResearch/hermes-agent/pull/15607))
|
||||
- **MCP: rewrite `definitions` refs to `$defs` in input schemas** — closes provider-side 400s
|
||||
- **Azure content filter compatibility** — renamed `[SYSTEM:` markers so Azure's content filter stops flagging them ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
|
||||
- **Vision cache uses HERMES_HOME instead of cwd** ([#17719](https://github.com/NousResearch/hermes-agent/pull/17719))
|
||||
- **FTS5 search** — tool_name + tool_calls indexing with repair + migration ([#16914](https://github.com/NousResearch/hermes-agent/pull/16914))
|
||||
- **Streaming reasoning persists on assistant turns** ([#16892](https://github.com/NousResearch/hermes-agent/pull/16892))
|
||||
- **execute_code concurrent RPC serialization** (#17770) ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
|
||||
- **Background reviewer scoped to memory + skills toolsets** — no more accidental web/shell escapes ([#16569](https://github.com/NousResearch/hermes-agent/pull/16569))
|
||||
- **Compression recovery** — retry on main before giving up; notify user when aux fails ([#16774](https://github.com/NousResearch/hermes-agent/pull/16774), [#16775](https://github.com/NousResearch/hermes-agent/pull/16775))
|
||||
- **`croniter` promoted to a core dependency** ([#17577](https://github.com/NousResearch/hermes-agent/pull/17577))
|
||||
- **Discord tool `limit` parameter coerced to int** before `min()` call ([#16319](https://github.com/NousResearch/hermes-agent/pull/16319))
|
||||
- **Yuanbao messaging platform entrance fix** ([#16880](https://github.com/NousResearch/hermes-agent/pull/16880))
|
||||
- **ACP advertise and forward image prompts** ([#18030](https://github.com/NousResearch/hermes-agent/pull/18030))
|
||||
- **DeepSeek / Kimi reasoning content isolation** across cross-provider histories (@Zjianru) ([#15749](https://github.com/NousResearch/hermes-agent/pull/15749), [#15762](https://github.com/NousResearch/hermes-agent/pull/15762))
|
||||
- **Preserve reasoning_content replay on DeepSeek v4 + Kimi/Moonshot thinking** ([#18045](https://github.com/NousResearch/hermes-agent/pull/18045))
|
||||
|
||||
The vast majority of the 360 fixes landed in the streaming/compression/tool-calling paths across all providers — DeepSeek, Kimi, Moonshot, GLM, Qwen, MiniMax, Gemini, Anthropic, OpenAI — alongside TUI polish (resize, scroll, sticky-prompt) and gateway platform-specific edge cases.
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing & CI
|
||||
|
||||
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
|
||||
- **Microsoft Teams xdist collision guard** — prevents worker collisions when Teams platform tests run in parallel ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
- Chore: remove unused imports and dead locals (ruff F401, F841) ([#17010](https://github.com/NousResearch/hermes-agent/pull/17010))
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- **Curator feature page** added to docs site ([#17563](https://github.com/NousResearch/hermes-agent/pull/17563))
|
||||
- **Document pin also blocking `skill_manage` writes** ([#17578](https://github.com/NousResearch/hermes-agent/pull/17578))
|
||||
- **Direct-URL skill install documented** across features, reference, guide, and `hermes-agent` skill ([#16355](https://github.com/NousResearch/hermes-agent/pull/16355))
|
||||
- **Hooks tutorial — build a BOOT.md startup checklist** (replaces the removed built-in hook) ([#17202](https://github.com/NousResearch/hermes-agent/pull/17202))
|
||||
- **ComfyUI docs: ask local vs cloud FIRST before hardware check** ([#17612](https://github.com/NousResearch/hermes-agent/pull/17612))
|
||||
- **Obliteratus skill: link YouTube video guide in SKILL.md** ([#15808](https://github.com/NousResearch/hermes-agent/pull/15808))
|
||||
- Per-skill docs pages generated for bundled + optional skills; ASCII art code blocks auto-wrapped ([#14929](https://github.com/NousResearch/hermes-agent/pull/14929), [#16497](https://github.com/NousResearch/hermes-agent/pull/16497))
|
||||
|
||||
---
|
||||
|
||||
## ⚖️ Removed / Reverted
|
||||
|
||||
- **Kanban multi-profile collaboration board** — landed in #16081, reverted in ([#16098](https://github.com/NousResearch/hermes-agent/pull/16098)) while the design is reworked
|
||||
- **computer-use cua-driver** — 3 preparatory PRs landed then were reverted in ([#16927](https://github.com/NousResearch/hermes-agent/pull/16927))
|
||||
- **BOOT.md built-in hook** removed ([#17093](https://github.com/NousResearch/hermes-agent/pull/17093)); the hooks tutorial ([#17202](https://github.com/NousResearch/hermes-agent/pull/17202)) shows how to build the same workflow yourself with a shell hook
|
||||
- **`/provider` + `/plan` slash commands dropped** ([#15047](https://github.com/NousResearch/hermes-agent/pull/15047))
|
||||
- **`flush_memories` removed entirely** ([#15696](https://github.com/NousResearch/hermes-agent/pull/15696))
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- **@teknium1** (Teknium)
|
||||
|
||||
### Top Community Contributors (by merged PR count since v0.11.0)
|
||||
|
||||
- **@OutThisLife** (Brooklyn) — 52 PRs · TUI — light-terminal detection + pluggable busy styles + auto-resume + session-delete from /resume + mouse-wheel scrolling + xterm.js dashboard Chat tab + cold-start cut + accordion polish
|
||||
- **@kshitijk4poor** — 12 PRs · LM Studio first-class provider (salvage), Vercel Sandbox backend, GMI Cloud salvage, bundled-by-default touchdesigner-mcp, many tool-call / reasoning fixes
|
||||
- **@helix4u** — 10 PRs · MCP schema robustness, assorted stability fixes
|
||||
- **@alt-glitch** — 8 PRs · trigram FTS5 CJK search, declarative Nix plugin install, matrix/feishu hints and fixes
|
||||
- **@ethernet8023** — 4 PRs
|
||||
- **@austinpickett** — 4 PRs · LaTeX rendering in TUI, dashboard layout refresh
|
||||
- **@benbarclay** — 3 PRs · Docker run-as-host-user so bind mounts don't get root-owned
|
||||
- **@vominh1919** — 2 PRs
|
||||
- **@stephenschoettler** — 2 PRs
|
||||
- **@kevin-ho** — ConPTY mouse-injection fix (#15488)
|
||||
- **@Zjianru** — cross-provider reasoning_content isolation + DeepSeek/Kimi empty-reasoning injection (#15749, #15762)
|
||||
- **@web3blind** — Telegram chat allowlists for groups and forums (#15027)
|
||||
- **@SHL0MS** — 9 new TouchDesigner-MCP reference docs (#16768)
|
||||
- **@0xDevNinja** — curator `restore_skill` nested-archive fix (#17951)
|
||||
- **@y0shua1ee** — curator `use` activity fix (#17953)
|
||||
|
||||
### Also contributing
|
||||
Salvaged or co-authored work from **@isaachuangGMICLOUD** (GMI Cloud), earlier upstream PRs from the original author of each salvage chain, and a long tail of one-shot fixes, documentation nudges, and skill contributions from the community.
|
||||
|
||||
### All Contributors (alphabetical, excluding @teknium1)
|
||||
|
||||
@0xbyt4, @0xharryriddle, @0xDevNinja, @0z1-ghb, @5park1e, @A-FdL-Prog, @aj-nt, @akhater, @alblez, @alexg0bot,
|
||||
@alexzhu0, @AllardQuek, @alt-glitch, @amanning3390, @amanuel2, @AndreKurait, @andrewhosf, @Andy283, @andyylin,
|
||||
@angel12, @AntAISecurityLab, @ash, @austinpickett, @badgerbees, @BadTechBandit, @Bartok9, @beenherebefore,
|
||||
@beesrsj2500, @BeliefanX, @benbarclay, @benjaminsehl, @BlackishGreen33, @bloodcarter, @BlueBirdBack,
|
||||
@briandevans, @brooklynnicholson, @bsgdigital, @buray, @bwjoke, @camaragon, @cdanis, @cgarwood82,
|
||||
@charles-brooks, @chen1749144759, @chengoak, @ching-kaching, @Contentment003111, @crayfish-ai, @CruxExperts,
|
||||
@cyclingwithelephants, @dandaka, @danklynn, @ddupont808, @dhabibi, @difujia, @dimitrovi, @dlkakbs,
|
||||
@dontcallmejames, @EKKOLearnAI, @emozilla, @ericnicolaides, @Erosika, @ethernet8023, @exiao, @Feranmi10,
|
||||
@flobo3, @foxion37, @georgeglessner, @georgex8001, @ghostmfr, @H-Ali13381, @HangGlidersRule, @harryplusplus,
|
||||
@haru398801, @heathley, @hejuntt1014, @hekaru-agent, @helix4u, @Heltman, @HenkDz, @heyitsaamir, @hharry11,
|
||||
@hhhonzik, @hhuang91, @HiddenPuppy, @htsh, @iamagenius00, @in-liberty420, @innocarpe, @irispillars, @iRonin,
|
||||
@isaachuangGMICLOUD, @Ito-69, @j3ffffff, @jackjin1997, @jakubkrcmar, @Jason2031, @JayGwod, @jerome-benoit,
|
||||
@johnncenae, @Kailigithub, @keiravoss94, @kevin-ho, @knockyai, @konsisumer, @kshitijk4poor, @kunlabs, @l0hde,
|
||||
@Leihb, @leoneparise, @LeonSGP43, @liizfq, @liuhao1024, @loongzhao, @lsdsjy, @luyao618, @ma-pony, @Magaav,
|
||||
@MagicRay1217, @math0r-be, @MattMaximo, @maxims-oss, @MaxyMoos, @maymuneth, @mcndjxlefnd, @memosr,
|
||||
@MestreY0d4-Uninter, @mewwts, @Mirac1eSky, @MorAlekss, @mrhwick, @mrunmayee17, @mssteuer, @Nanako0129,
|
||||
@nazirulhafiy, @Nerijusas, @Nicecsh, @nicoloboschi, @nightq, @ningfangbin, @octo-patch, @Octopus,
|
||||
@OutThisLife, @Paperclip, @pein892, @perlowja, @prasadus92, @qike-ms, @qiyin-code, @Readon, @ReginaldasR,
|
||||
@revaraver, @rfilgueiras, @rmoen, @romanornr, @rugvedS07, @rylena, @samrusani, @Sanjays2402, @sasha-id,
|
||||
@Satoshi-agi, @scheidti, @scotttrinh, @season179, @SeeYangZhi, @sgaofen, @shamork, @shannonsands, @SHL0MS,
|
||||
@simbam99, @Societus, @socrates1024, @Sonoyunchu, @sprmn24, @stephenschoettler, @tangyuanjc, @TechPrototyper,
|
||||
@tekgnosis-net, @ThomassJonax, @tmimmanuel, @tochukwuada, @Tosko4, @Tranquil-Flow, @twozle, @txbxxx,
|
||||
@UgwujaGeorge, @Versun, @vlwkaos, @voidborne-d, @vominh1919, @Wang-tianhao, @Wangshengyang2004, @web3blind,
|
||||
@westers, @Wysie, @xandersbell, @xiahu88988, @XieNBi, @xinbenlv, @xnbi, @y0shua1ee, @yatesjalex, @yes999zc,
|
||||
@yeyitech, @Yoimex, @YueLich, @Yukipukii1, @zhiyanliu, @zicochaos, @Zjianru, @zkl2333, @zons-zhaozhy,
|
||||
@ztexydt-cqh.
|
||||
|
||||
Also: @Siddharth Balyan, @YuShu.
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.23...v2026.4.30](https://github.com/NousResearch/hermes-agent/compare/v2026.4.23...v2026.4.30)
|
||||
+381
-30
@@ -4,6 +4,7 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextvars
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict, deque
|
||||
@@ -47,6 +48,7 @@ from acp.schema import (
|
||||
TextContentBlock,
|
||||
UnstructuredCommandInput,
|
||||
Usage,
|
||||
UsageUpdate,
|
||||
UserMessageChunk,
|
||||
)
|
||||
|
||||
@@ -65,6 +67,7 @@ from acp_adapter.events import (
|
||||
)
|
||||
from acp_adapter.permissions import make_approval_callback
|
||||
from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets
|
||||
from acp_adapter.tools import build_tool_complete, build_tool_start
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -164,6 +167,8 @@ class HermesACPAgent(acp.Agent):
|
||||
"context": "Show conversation context info",
|
||||
"reset": "Clear conversation history",
|
||||
"compact": "Compress conversation context",
|
||||
"steer": "Inject guidance into the currently running agent turn",
|
||||
"queue": "Queue a prompt to run after the current turn finishes",
|
||||
"version": "Show Hermes version",
|
||||
}
|
||||
|
||||
@@ -193,6 +198,16 @@ class HermesACPAgent(acp.Agent):
|
||||
"name": "compact",
|
||||
"description": "Compress conversation context",
|
||||
},
|
||||
{
|
||||
"name": "steer",
|
||||
"description": "Inject guidance into the currently running agent turn",
|
||||
"input_hint": "guidance for the active turn",
|
||||
},
|
||||
{
|
||||
"name": "queue",
|
||||
"description": "Queue a prompt to run after the current turn finishes",
|
||||
"input_hint": "prompt to run next",
|
||||
},
|
||||
{
|
||||
"name": "version",
|
||||
"description": "Show Hermes version",
|
||||
@@ -303,6 +318,66 @@ class HermesACPAgent(acp.Agent):
|
||||
|
||||
return target_provider, new_model
|
||||
|
||||
@staticmethod
|
||||
def _build_usage_update(state: SessionState) -> UsageUpdate | None:
|
||||
"""Build ACP native context-usage data for clients like Zed.
|
||||
|
||||
Zed's circular context indicator is driven by ACP ``usage_update``
|
||||
session updates: ``size`` is the model context window and ``used`` is
|
||||
the current request pressure. Hermes estimates ``used`` from the same
|
||||
buckets it sends to providers: system prompt, conversation history, and
|
||||
tool schemas.
|
||||
"""
|
||||
agent = state.agent
|
||||
compressor = getattr(agent, "context_compressor", None)
|
||||
size = int(getattr(compressor, "context_length", 0) or 0)
|
||||
if size <= 0:
|
||||
return None
|
||||
|
||||
try:
|
||||
from agent.model_metadata import estimate_request_tokens_rough
|
||||
|
||||
used = estimate_request_tokens_rough(
|
||||
state.history,
|
||||
system_prompt=getattr(agent, "_cached_system_prompt", "") or "",
|
||||
tools=getattr(agent, "tools", None) or None,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Could not estimate ACP native context usage", exc_info=True)
|
||||
used = int(getattr(compressor, "last_prompt_tokens", 0) or 0)
|
||||
|
||||
return UsageUpdate(
|
||||
session_update="usage_update",
|
||||
size=max(size, 0),
|
||||
used=max(used, 0),
|
||||
)
|
||||
|
||||
async def _send_usage_update(self, state: SessionState) -> None:
|
||||
"""Send ACP native context usage to the connected client."""
|
||||
if not self._conn:
|
||||
return
|
||||
update = self._build_usage_update(state)
|
||||
if update is None:
|
||||
return
|
||||
try:
|
||||
await self._conn.session_update(
|
||||
session_id=state.session_id,
|
||||
update=update,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to send ACP usage update for session %s",
|
||||
state.session_id,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
def _schedule_usage_update(self, state: SessionState) -> None:
|
||||
"""Schedule native context indicator refresh after ACP responses."""
|
||||
if not self._conn:
|
||||
return
|
||||
loop = asyncio.get_running_loop()
|
||||
loop.call_soon(asyncio.create_task, self._send_usage_update(state))
|
||||
|
||||
async def _register_session_mcp_servers(
|
||||
self,
|
||||
state: SessionState,
|
||||
@@ -473,37 +548,99 @@ class HermesACPAgent(acp.Agent):
|
||||
)
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _history_tool_call_name_args(tool_call: dict[str, Any]) -> tuple[str, dict[str, Any]]:
|
||||
"""Extract function name/arguments from an OpenAI-style tool_call."""
|
||||
function = tool_call.get("function") if isinstance(tool_call.get("function"), dict) else {}
|
||||
name = str(function.get("name") or tool_call.get("name") or "unknown_tool")
|
||||
raw_args = function.get("arguments") or tool_call.get("arguments") or tool_call.get("args") or {}
|
||||
if isinstance(raw_args, str):
|
||||
try:
|
||||
parsed = json.loads(raw_args)
|
||||
except Exception:
|
||||
parsed = {"raw": raw_args}
|
||||
raw_args = parsed
|
||||
if not isinstance(raw_args, dict):
|
||||
raw_args = {}
|
||||
return name, raw_args
|
||||
|
||||
@staticmethod
|
||||
def _history_tool_call_id(tool_call: dict[str, Any]) -> str:
|
||||
"""Return the stable provider tool call id for ACP history replay."""
|
||||
return str(
|
||||
tool_call.get("id")
|
||||
or tool_call.get("call_id")
|
||||
or tool_call.get("tool_call_id")
|
||||
or ""
|
||||
).strip()
|
||||
|
||||
async def _replay_session_history(self, state: SessionState) -> None:
|
||||
"""Send persisted user/assistant history to clients during session/load.
|
||||
|
||||
Zed's ACP history UI calls ``session/load`` after the user picks an item
|
||||
from the Agents sidebar. The agent must then replay the full conversation
|
||||
as ``user_message_chunk`` / ``agent_message_chunk`` notifications; merely
|
||||
restoring server-side state makes Hermes remember context, but leaves the
|
||||
editor looking like a clean thread.
|
||||
as user/assistant chunks plus reconstructed tool-call start/completion
|
||||
notifications; merely restoring server-side state makes Hermes remember
|
||||
context, but leaves the editor looking like a clean thread.
|
||||
"""
|
||||
if not self._conn or not state.history:
|
||||
return
|
||||
|
||||
for message in state.history:
|
||||
role = str(message.get("role") or "")
|
||||
if role not in {"user", "assistant"}:
|
||||
continue
|
||||
text = self._history_message_text(message)
|
||||
if not text:
|
||||
continue
|
||||
update = self._history_message_update(role=role, text=text)
|
||||
if update is None:
|
||||
continue
|
||||
active_tool_calls: dict[str, tuple[str, dict[str, Any]]] = {}
|
||||
|
||||
async def _send(update: Any) -> bool:
|
||||
try:
|
||||
await self._conn.session_update(session_id=state.session_id, update=update)
|
||||
return True
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to replay ACP history for session %s",
|
||||
state.session_id,
|
||||
exc_info=True,
|
||||
)
|
||||
return
|
||||
return False
|
||||
|
||||
for message in state.history:
|
||||
role = str(message.get("role") or "")
|
||||
|
||||
if role in {"user", "assistant"}:
|
||||
text = self._history_message_text(message)
|
||||
if text:
|
||||
update = self._history_message_update(role=role, text=text)
|
||||
if update is not None and not await _send(update):
|
||||
return
|
||||
|
||||
if role == "assistant" and isinstance(message.get("tool_calls"), list):
|
||||
for tool_call in message["tool_calls"]:
|
||||
if not isinstance(tool_call, dict):
|
||||
continue
|
||||
tool_call_id = self._history_tool_call_id(tool_call)
|
||||
if not tool_call_id:
|
||||
continue
|
||||
tool_name, args = self._history_tool_call_name_args(tool_call)
|
||||
active_tool_calls[tool_call_id] = (tool_name, args)
|
||||
if not await _send(build_tool_start(tool_call_id, tool_name, args)):
|
||||
return
|
||||
continue
|
||||
|
||||
if role == "tool":
|
||||
tool_call_id = str(message.get("tool_call_id") or "").strip()
|
||||
tool_name = str(message.get("tool_name") or "").strip()
|
||||
function_args: dict[str, Any] | None = None
|
||||
if tool_call_id in active_tool_calls:
|
||||
tool_name, function_args = active_tool_calls.pop(tool_call_id)
|
||||
if not tool_call_id or not tool_name:
|
||||
continue
|
||||
result = message.get("content")
|
||||
if not await _send(
|
||||
build_tool_complete(
|
||||
tool_call_id,
|
||||
tool_name,
|
||||
result=result if isinstance(result, str) else None,
|
||||
function_args=function_args,
|
||||
)
|
||||
):
|
||||
return
|
||||
|
||||
async def new_session(
|
||||
self,
|
||||
@@ -515,11 +652,24 @@ class HermesACPAgent(acp.Agent):
|
||||
await self._register_session_mcp_servers(state, mcp_servers)
|
||||
logger.info("New session %s (cwd=%s)", state.session_id, cwd)
|
||||
self._schedule_available_commands_update(state.session_id)
|
||||
self._schedule_usage_update(state)
|
||||
return NewSessionResponse(
|
||||
session_id=state.session_id,
|
||||
models=self._build_model_state(state),
|
||||
)
|
||||
|
||||
def _schedule_history_replay(self, state: SessionState) -> None:
|
||||
"""Replay persisted history after session/load or session/resume returns.
|
||||
|
||||
Zed only attaches streamed transcript/tool updates once the load/resume
|
||||
response has completed. Sending replay notifications while the request is
|
||||
still in-flight can make the server look correct in logs while the editor
|
||||
drops or fails to attach the tool-call history.
|
||||
"""
|
||||
loop = asyncio.get_running_loop()
|
||||
replay_coro = self._replay_session_history(state)
|
||||
loop.call_soon(asyncio.create_task, replay_coro)
|
||||
|
||||
async def load_session(
|
||||
self,
|
||||
cwd: str,
|
||||
@@ -533,8 +683,9 @@ class HermesACPAgent(acp.Agent):
|
||||
return None
|
||||
await self._register_session_mcp_servers(state, mcp_servers)
|
||||
logger.info("Loaded session %s", session_id)
|
||||
await self._replay_session_history(state)
|
||||
self._schedule_history_replay(state)
|
||||
self._schedule_available_commands_update(session_id)
|
||||
self._schedule_usage_update(state)
|
||||
return LoadSessionResponse(models=self._build_model_state(state))
|
||||
|
||||
async def resume_session(
|
||||
@@ -550,13 +701,17 @@ class HermesACPAgent(acp.Agent):
|
||||
state = self.session_manager.create_session(cwd=cwd)
|
||||
await self._register_session_mcp_servers(state, mcp_servers)
|
||||
logger.info("Resumed session %s", state.session_id)
|
||||
await self._replay_session_history(state)
|
||||
self._schedule_history_replay(state)
|
||||
self._schedule_available_commands_update(state.session_id)
|
||||
self._schedule_usage_update(state)
|
||||
return ResumeSessionResponse(models=self._build_model_state(state))
|
||||
|
||||
async def cancel(self, session_id: str, **kwargs: Any) -> None:
|
||||
state = self.session_manager.get_session(session_id)
|
||||
if state and state.cancel_event:
|
||||
with state.runtime_lock:
|
||||
if state.is_running and state.current_prompt_text:
|
||||
state.interrupted_prompt_text = state.current_prompt_text
|
||||
state.cancel_event.set()
|
||||
try:
|
||||
if getattr(state, "agent", None) and hasattr(state.agent, "interrupt"):
|
||||
@@ -654,6 +809,39 @@ class HermesACPAgent(acp.Agent):
|
||||
if not has_content:
|
||||
return PromptResponse(stop_reason="end_turn")
|
||||
|
||||
# /steer on an idle session has no in-flight tool call to inject into.
|
||||
# Rewrite it so the payload runs as a normal user prompt, matching the
|
||||
# gateway's behavior (gateway/run.py ~L4898). Two sub-cases:
|
||||
# 1. Zed-interrupt salvage — a prior prompt was cancelled by the
|
||||
# client right before /steer arrived; replay it with the steer
|
||||
# text attached as explicit correction/guidance so the user's
|
||||
# in-flight work isn't lost.
|
||||
# 2. Plain idle — no prior work to salvage; just run the steer
|
||||
# payload as a regular prompt. Without this, _cmd_steer would
|
||||
# silently append to state.queued_prompts and respond with
|
||||
# "No active turn — queued for the next turn", which looks like
|
||||
# /queue even though the user never typed /queue.
|
||||
if isinstance(user_content, str) and user_text.startswith("/steer"):
|
||||
steer_text = user_text.split(maxsplit=1)[1].strip() if len(user_text.split(maxsplit=1)) > 1 else ""
|
||||
interrupted_prompt = ""
|
||||
rewrite_idle = False
|
||||
with state.runtime_lock:
|
||||
if not state.is_running and steer_text:
|
||||
if state.interrupted_prompt_text:
|
||||
interrupted_prompt = state.interrupted_prompt_text
|
||||
state.interrupted_prompt_text = ""
|
||||
else:
|
||||
rewrite_idle = True
|
||||
if interrupted_prompt:
|
||||
user_text = (
|
||||
f"{interrupted_prompt}\n\n"
|
||||
f"User correction/guidance after interrupt: {steer_text}"
|
||||
)
|
||||
user_content = user_text
|
||||
elif rewrite_idle:
|
||||
user_text = steer_text
|
||||
user_content = steer_text
|
||||
|
||||
# Intercept slash commands — handle locally without calling the LLM.
|
||||
# Slash commands are text-only; if the client included images/resources,
|
||||
# send the whole multimodal prompt to the agent instead of treating it as
|
||||
@@ -664,8 +852,27 @@ class HermesACPAgent(acp.Agent):
|
||||
if self._conn:
|
||||
update = acp.update_agent_message_text(response_text)
|
||||
await self._conn.session_update(session_id, update)
|
||||
await self._send_usage_update(state)
|
||||
return PromptResponse(stop_reason="end_turn")
|
||||
|
||||
# If Zed sends another regular prompt while the same ACP session is
|
||||
# still running, queue it instead of racing two AIAgent loops against
|
||||
# the same state.history. /steer and /queue are handled above and can
|
||||
# land immediately.
|
||||
with state.runtime_lock:
|
||||
if state.is_running:
|
||||
queued_text = user_text or "[Image attachment]"
|
||||
state.queued_prompts.append(queued_text)
|
||||
depth = len(state.queued_prompts)
|
||||
if self._conn:
|
||||
update = acp.update_agent_message_text(
|
||||
f"Queued for the next turn. ({depth} queued)"
|
||||
)
|
||||
await self._conn.session_update(session_id, update)
|
||||
return PromptResponse(stop_reason="end_turn")
|
||||
state.is_running = True
|
||||
state.current_prompt_text = user_text or "[Image attachment]"
|
||||
|
||||
logger.info("Prompt on session %s: %s", session_id, user_text[:100])
|
||||
|
||||
conn = self._conn
|
||||
@@ -678,24 +885,37 @@ class HermesACPAgent(acp.Agent):
|
||||
tool_call_meta: dict[str, dict[str, Any]] = {}
|
||||
previous_approval_cb = None
|
||||
|
||||
streamed_message = False
|
||||
|
||||
if conn:
|
||||
tool_progress_cb = make_tool_progress_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
|
||||
thinking_cb = make_thinking_cb(conn, session_id, loop)
|
||||
reasoning_cb = make_thinking_cb(conn, session_id, loop)
|
||||
step_cb = make_step_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
|
||||
message_cb = make_message_cb(conn, session_id, loop)
|
||||
|
||||
def stream_delta_cb(text: str) -> None:
|
||||
nonlocal streamed_message
|
||||
if text:
|
||||
streamed_message = True
|
||||
message_cb(text)
|
||||
|
||||
approval_cb = make_approval_callback(conn.request_permission, loop, session_id)
|
||||
else:
|
||||
tool_progress_cb = None
|
||||
thinking_cb = None
|
||||
reasoning_cb = None
|
||||
step_cb = None
|
||||
message_cb = None
|
||||
stream_delta_cb = None
|
||||
approval_cb = None
|
||||
|
||||
agent = state.agent
|
||||
agent.tool_progress_callback = tool_progress_cb
|
||||
agent.thinking_callback = thinking_cb
|
||||
# ACP thought panes should not receive Hermes' local kawaii waiting/status
|
||||
# updates. Route provider/model reasoning deltas instead; if the provider
|
||||
# emits no reasoning, Zed should not get a fake "thinking" accordion.
|
||||
agent.thinking_callback = None
|
||||
agent.reasoning_callback = reasoning_cb
|
||||
agent.step_callback = step_cb
|
||||
agent.message_callback = message_cb
|
||||
agent.stream_delta_callback = stream_delta_cb
|
||||
|
||||
# Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
|
||||
# Set it INSIDE _run_agent so the TLS write happens in the executor
|
||||
@@ -777,6 +997,9 @@ class HermesACPAgent(acp.Agent):
|
||||
result = await loop.run_in_executor(_executor, ctx.run, _run_agent)
|
||||
except Exception:
|
||||
logger.exception("Executor error for session %s", session_id)
|
||||
with state.runtime_lock:
|
||||
state.is_running = False
|
||||
state.current_prompt_text = ""
|
||||
return PromptResponse(stop_reason="end_turn")
|
||||
|
||||
if result.get("messages"):
|
||||
@@ -798,10 +1021,32 @@ class HermesACPAgent(acp.Agent):
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Failed to auto-title ACP session %s", session_id, exc_info=True)
|
||||
if final_response and conn:
|
||||
if final_response and conn and not streamed_message:
|
||||
update = acp.update_agent_message_text(final_response)
|
||||
await conn.session_update(session_id, update)
|
||||
|
||||
# Mark this turn idle before draining queued work so recursive prompt()
|
||||
# calls can acquire the session. Queued turns are intentionally run as
|
||||
# normal follow-up user prompts, preserving role alternation and history.
|
||||
with state.runtime_lock:
|
||||
state.is_running = False
|
||||
state.current_prompt_text = ""
|
||||
|
||||
while True:
|
||||
with state.runtime_lock:
|
||||
if not state.queued_prompts:
|
||||
break
|
||||
next_prompt = state.queued_prompts.pop(0)
|
||||
if conn:
|
||||
await conn.session_update(
|
||||
session_id,
|
||||
acp.update_user_message_text(next_prompt),
|
||||
)
|
||||
await self.prompt(
|
||||
prompt=[TextContentBlock(type="text", text=next_prompt)],
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
usage = None
|
||||
if any(result.get(key) is not None for key in ("prompt_tokens", "completion_tokens", "total_tokens")):
|
||||
usage = Usage(
|
||||
@@ -812,6 +1057,8 @@ class HermesACPAgent(acp.Agent):
|
||||
cached_read_tokens=result.get("cache_read_tokens"),
|
||||
)
|
||||
|
||||
await self._send_usage_update(state)
|
||||
|
||||
stop_reason = "cancelled" if state.cancel_event and state.cancel_event.is_set() else "end_turn"
|
||||
return PromptResponse(stop_reason=stop_reason, usage=usage)
|
||||
|
||||
@@ -879,6 +1126,8 @@ class HermesACPAgent(acp.Agent):
|
||||
"context": self._cmd_context,
|
||||
"reset": self._cmd_reset,
|
||||
"compact": self._cmd_compact,
|
||||
"steer": self._cmd_steer,
|
||||
"queue": self._cmd_queue,
|
||||
"version": self._cmd_version,
|
||||
}.get(cmd)
|
||||
|
||||
@@ -942,22 +1191,84 @@ class HermesACPAgent(acp.Agent):
|
||||
return f"Could not list tools: {e}"
|
||||
|
||||
def _cmd_context(self, args: str, state: SessionState) -> str:
|
||||
"""Show ACP session context pressure and compression guidance."""
|
||||
n_messages = len(state.history)
|
||||
if n_messages == 0:
|
||||
return "Conversation is empty (no messages yet)."
|
||||
# Count by role
|
||||
|
||||
# Count by role.
|
||||
roles: dict[str, int] = {}
|
||||
for msg in state.history:
|
||||
role = msg.get("role", "unknown")
|
||||
roles[role] = roles.get(role, 0) + 1
|
||||
|
||||
agent = state.agent
|
||||
model = state.model or getattr(agent, "model", "")
|
||||
provider = getattr(agent, "provider", None) or "auto"
|
||||
compressor = getattr(agent, "context_compressor", None)
|
||||
context_length = int(getattr(compressor, "context_length", 0) or 0)
|
||||
threshold_tokens = int(getattr(compressor, "threshold_tokens", 0) or 0)
|
||||
|
||||
try:
|
||||
from agent.model_metadata import estimate_request_tokens_rough
|
||||
|
||||
system_prompt = getattr(agent, "_cached_system_prompt", "") or ""
|
||||
tools = getattr(agent, "tools", None) or None
|
||||
approx_tokens = estimate_request_tokens_rough(
|
||||
state.history,
|
||||
system_prompt=system_prompt,
|
||||
tools=tools,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Could not estimate ACP context usage", exc_info=True)
|
||||
approx_tokens = 0
|
||||
|
||||
if threshold_tokens <= 0 and context_length > 0:
|
||||
threshold_tokens = int(context_length * 0.80)
|
||||
|
||||
lines = [
|
||||
f"Conversation: {n_messages} messages",
|
||||
f"Conversation: {n_messages} messages"
|
||||
if n_messages
|
||||
else "Conversation is empty (no messages yet).",
|
||||
f" user: {roles.get('user', 0)}, assistant: {roles.get('assistant', 0)}, "
|
||||
f"tool: {roles.get('tool', 0)}, system: {roles.get('system', 0)}",
|
||||
]
|
||||
model = state.model or getattr(state.agent, "model", "")
|
||||
if model:
|
||||
lines.append(f"Model: {model}")
|
||||
lines.append(f"Provider: {provider}")
|
||||
|
||||
if approx_tokens > 0:
|
||||
if context_length > 0:
|
||||
usage_pct = (approx_tokens / context_length) * 100
|
||||
lines.append(
|
||||
f"Context usage: ~{approx_tokens:,} / {context_length:,} tokens ({usage_pct:.1f}%)"
|
||||
)
|
||||
else:
|
||||
lines.append(f"Context usage: ~{approx_tokens:,} tokens")
|
||||
|
||||
if threshold_tokens > 0:
|
||||
if approx_tokens > 0:
|
||||
threshold_pct = (threshold_tokens / context_length) * 100 if context_length > 0 else 0
|
||||
remaining = max(threshold_tokens - approx_tokens, 0)
|
||||
if approx_tokens >= threshold_tokens:
|
||||
lines.append(
|
||||
f"Compression: due now (threshold ~{threshold_tokens:,}"
|
||||
+ (f", {threshold_pct:.0f}%" if threshold_pct else "")
|
||||
+ "). Run /compact."
|
||||
)
|
||||
else:
|
||||
lines.append(
|
||||
f"Compression: ~{remaining:,} tokens until threshold "
|
||||
f"(~{threshold_tokens:,}"
|
||||
+ (f", {threshold_pct:.0f}%" if threshold_pct else "")
|
||||
+ ")."
|
||||
)
|
||||
else:
|
||||
lines.append(f"Compression threshold: ~{threshold_tokens:,} tokens")
|
||||
|
||||
if getattr(agent, "compression_enabled", True) is False:
|
||||
lines.append("Compression is disabled for this agent.")
|
||||
else:
|
||||
lines.append("Tip: run /compact to compress manually before the threshold.")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _cmd_reset(self, args: str, state: SessionState) -> str:
|
||||
@@ -975,10 +1286,16 @@ class HermesACPAgent(acp.Agent):
|
||||
if not hasattr(agent, "_compress_context"):
|
||||
return "Context compression not available for this agent."
|
||||
|
||||
from agent.model_metadata import estimate_messages_tokens_rough
|
||||
from agent.model_metadata import estimate_request_tokens_rough
|
||||
|
||||
original_count = len(state.history)
|
||||
approx_tokens = estimate_messages_tokens_rough(state.history)
|
||||
# Include system prompt + tool schemas so the figure reflects real
|
||||
# request pressure, not a transcript-only underestimate (#6217).
|
||||
_sys_prompt = getattr(agent, "_cached_system_prompt", "") or ""
|
||||
_tools = getattr(agent, "tools", None) or None
|
||||
approx_tokens = estimate_request_tokens_rough(
|
||||
state.history, system_prompt=_sys_prompt, tools=_tools
|
||||
)
|
||||
original_session_db = getattr(agent, "_session_db", None)
|
||||
|
||||
try:
|
||||
@@ -998,7 +1315,13 @@ class HermesACPAgent(acp.Agent):
|
||||
self.session_manager.save_session(state.session_id)
|
||||
|
||||
new_count = len(state.history)
|
||||
new_tokens = estimate_messages_tokens_rough(state.history)
|
||||
_sys_prompt_after = getattr(agent, "_cached_system_prompt", "") or _sys_prompt
|
||||
_tools_after = getattr(agent, "tools", None) or _tools
|
||||
new_tokens = estimate_request_tokens_rough(
|
||||
state.history,
|
||||
system_prompt=_sys_prompt_after,
|
||||
tools=_tools_after,
|
||||
)
|
||||
return (
|
||||
f"Context compressed: {original_count} -> {new_count} messages\n"
|
||||
f"~{approx_tokens:,} -> ~{new_tokens:,} tokens"
|
||||
@@ -1006,6 +1329,34 @@ class HermesACPAgent(acp.Agent):
|
||||
except Exception as e:
|
||||
return f"Compression failed: {e}"
|
||||
|
||||
def _cmd_steer(self, args: str, state: SessionState) -> str:
|
||||
steer_text = args.strip()
|
||||
if not steer_text:
|
||||
return "Usage: /steer <guidance>"
|
||||
|
||||
if state.is_running and hasattr(state.agent, "steer"):
|
||||
try:
|
||||
if state.agent.steer(steer_text):
|
||||
preview = steer_text[:80] + ("..." if len(steer_text) > 80 else "")
|
||||
return f"⏩ Steer queued for the active turn: {preview}"
|
||||
except Exception as exc:
|
||||
logger.warning("ACP steer failed for session %s: %s", state.session_id, exc)
|
||||
return f"⚠️ Steer failed: {exc}"
|
||||
|
||||
with state.runtime_lock:
|
||||
state.queued_prompts.append(steer_text)
|
||||
depth = len(state.queued_prompts)
|
||||
return f"No active turn — queued for the next turn. ({depth} queued)"
|
||||
|
||||
def _cmd_queue(self, args: str, state: SessionState) -> str:
|
||||
queued_text = args.strip()
|
||||
if not queued_text:
|
||||
return "Usage: /queue <prompt>"
|
||||
with state.runtime_lock:
|
||||
state.queued_prompts.append(queued_text)
|
||||
depth = len(state.queued_prompts)
|
||||
return f"Queued for the next turn. ({depth} queued)"
|
||||
|
||||
def _cmd_version(self, args: str, state: SessionState) -> str:
|
||||
return f"Hermes Agent v{HERMES_VERSION}"
|
||||
|
||||
|
||||
+50
-18
@@ -26,6 +26,33 @@ from typing import Any, Dict, List, Optional
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _win_path_to_wsl(path: str) -> str | None:
|
||||
"""Convert a Windows drive path to its WSL /mnt/<drive>/... equivalent."""
|
||||
match = re.match(r"^([A-Za-z]):[\\/](.*)$", path)
|
||||
if not match:
|
||||
return None
|
||||
drive = match.group(1).lower()
|
||||
tail = match.group(2).replace("\\", "/")
|
||||
return f"/mnt/{drive}/{tail}"
|
||||
|
||||
|
||||
def _translate_acp_cwd(cwd: str) -> str:
|
||||
"""Translate Windows ACP cwd values when Hermes itself is running in WSL.
|
||||
|
||||
Windows ACP clients can launch ``hermes acp`` inside WSL while still sending
|
||||
editor workspaces as Windows drive paths such as ``E:\\Projects``. Store
|
||||
and execute against the WSL mount path so agents, tools, and persisted ACP
|
||||
sessions all agree on the usable workspace. Native Linux/macOS keeps the
|
||||
original cwd unchanged.
|
||||
"""
|
||||
from hermes_constants import is_wsl
|
||||
|
||||
if not is_wsl():
|
||||
return cwd
|
||||
translated = _win_path_to_wsl(str(cwd))
|
||||
return translated if translated is not None else cwd
|
||||
|
||||
|
||||
def _normalize_cwd_for_compare(cwd: str | None) -> str:
|
||||
raw = str(cwd or ".").strip()
|
||||
if not raw:
|
||||
@@ -34,11 +61,9 @@ def _normalize_cwd_for_compare(cwd: str | None) -> str:
|
||||
|
||||
# Normalize Windows drive paths into the equivalent WSL mount form so
|
||||
# ACP history filters match the same workspace across Windows and WSL.
|
||||
match = re.match(r"^([A-Za-z]):[\\/](.*)$", expanded)
|
||||
if match:
|
||||
drive = match.group(1).lower()
|
||||
tail = match.group(2).replace("\\", "/")
|
||||
expanded = f"/mnt/{drive}/{tail}"
|
||||
translated = _win_path_to_wsl(expanded)
|
||||
if translated is not None:
|
||||
expanded = translated
|
||||
elif re.match(r"^/mnt/[A-Za-z]/", expanded):
|
||||
expanded = f"/mnt/{expanded[5].lower()}/{expanded[7:]}"
|
||||
|
||||
@@ -96,12 +121,18 @@ def _acp_stderr_print(*args, **kwargs) -> None:
|
||||
|
||||
|
||||
def _register_task_cwd(task_id: str, cwd: str) -> None:
|
||||
"""Bind a task/session id to the editor's working directory for tools."""
|
||||
"""Bind a task/session id to the editor's working directory for tools.
|
||||
|
||||
Zed can launch Hermes from a Windows workspace while the ACP process runs
|
||||
inside WSL. In that case ACP sends cwd as e.g. ``E:\\Projects\\POTI``;
|
||||
local tools need the WSL mount equivalent or subprocess creation fails
|
||||
before the command can run.
|
||||
"""
|
||||
if not task_id:
|
||||
return
|
||||
try:
|
||||
from tools.terminal_tool import register_task_env_overrides
|
||||
register_task_env_overrides(task_id, {"cwd": cwd})
|
||||
register_task_env_overrides(task_id, {"cwd": _translate_acp_cwd(cwd)})
|
||||
except Exception:
|
||||
logger.debug("Failed to register ACP task cwd override", exc_info=True)
|
||||
|
||||
@@ -145,6 +176,11 @@ class SessionState:
|
||||
model: str = ""
|
||||
history: List[Dict[str, Any]] = field(default_factory=list)
|
||||
cancel_event: Any = None # threading.Event
|
||||
is_running: bool = False
|
||||
queued_prompts: List[str] = field(default_factory=list)
|
||||
runtime_lock: Any = field(default_factory=Lock)
|
||||
current_prompt_text: str = ""
|
||||
interrupted_prompt_text: str = ""
|
||||
|
||||
|
||||
class SessionManager:
|
||||
@@ -175,6 +211,7 @@ class SessionManager:
|
||||
"""Create a new session with a unique ID and a fresh AIAgent."""
|
||||
import threading
|
||||
|
||||
cwd = _translate_acp_cwd(cwd)
|
||||
session_id = str(uuid.uuid4())
|
||||
agent = self._make_agent(session_id=session_id, cwd=cwd)
|
||||
state = SessionState(
|
||||
@@ -217,6 +254,7 @@ class SessionManager:
|
||||
"""Deep-copy a session's history into a new session."""
|
||||
import threading
|
||||
|
||||
cwd = _translate_acp_cwd(cwd)
|
||||
original = self.get_session(session_id) # checks DB too
|
||||
if original is None:
|
||||
return None
|
||||
@@ -318,6 +356,7 @@ class SessionManager:
|
||||
|
||||
def update_cwd(self, session_id: str, cwd: str) -> Optional[SessionState]:
|
||||
"""Update the working directory for a session and its tool overrides."""
|
||||
cwd = _translate_acp_cwd(cwd)
|
||||
state = self.get_session(session_id) # checks DB too
|
||||
if state is None:
|
||||
return None
|
||||
@@ -427,17 +466,10 @@ class SessionManager:
|
||||
except Exception:
|
||||
logger.debug("Failed to update ACP session metadata", exc_info=True)
|
||||
|
||||
# Replace stored messages with current history.
|
||||
db.clear_messages(state.session_id)
|
||||
for msg in state.history:
|
||||
db.append_message(
|
||||
session_id=state.session_id,
|
||||
role=msg.get("role", "user"),
|
||||
content=msg.get("content"),
|
||||
tool_name=msg.get("tool_name") or msg.get("name"),
|
||||
tool_calls=msg.get("tool_calls"),
|
||||
tool_call_id=msg.get("tool_call_id"),
|
||||
)
|
||||
# Replace stored messages with current history atomically so a
|
||||
# mid-rewrite failure rolls back and the previously persisted
|
||||
# conversation is preserved (salvaged from #13675).
|
||||
db.replace_messages(state.session_id, state.history)
|
||||
except Exception:
|
||||
logger.warning("Failed to persist ACP session %s", state.session_id, exc_info=True)
|
||||
|
||||
|
||||
+822
-21
@@ -28,6 +28,11 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
|
||||
"terminal": "execute",
|
||||
"process": "execute",
|
||||
"execute_code": "execute",
|
||||
# Session/meta tools
|
||||
"todo": "other",
|
||||
"skill_view": "read",
|
||||
"skills_list": "read",
|
||||
"skill_manage": "edit",
|
||||
# Web / fetch
|
||||
"web_search": "fetch",
|
||||
"web_extract": "fetch",
|
||||
@@ -51,6 +56,28 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
|
||||
}
|
||||
|
||||
|
||||
_POLISHED_TOOLS = {
|
||||
# Core operator loop
|
||||
"todo", "memory", "session_search", "delegate_task",
|
||||
# Files / execution
|
||||
"read_file", "write_file", "patch", "search_files", "terminal", "process", "execute_code",
|
||||
# Skills / web / browser / media
|
||||
"skill_view", "skills_list", "skill_manage", "web_search", "web_extract",
|
||||
"browser_navigate", "browser_click", "browser_type", "browser_press", "browser_scroll",
|
||||
"browser_back", "browser_snapshot", "browser_console", "browser_get_images", "browser_vision",
|
||||
"vision_analyze", "image_generate", "text_to_speech",
|
||||
# Schedulers / platform integrations
|
||||
"cronjob", "send_message", "clarify", "discord", "discord_admin",
|
||||
"ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service",
|
||||
"feishu_doc_read", "feishu_drive_list_comments", "feishu_drive_list_comment_replies",
|
||||
"feishu_drive_reply_comment", "feishu_drive_add_comment",
|
||||
"kanban_create", "kanban_show", "kanban_comment", "kanban_complete",
|
||||
"kanban_block", "kanban_link", "kanban_heartbeat",
|
||||
"yb_query_group_info", "yb_query_group_members", "yb_search_sticker",
|
||||
"yb_send_dm", "yb_send_sticker", "mixture_of_agents",
|
||||
}
|
||||
|
||||
|
||||
def get_tool_kind(tool_name: str) -> ToolKind:
|
||||
"""Return the ACP ToolKind for a hermes tool, defaulting to 'other'."""
|
||||
return TOOL_KIND_MAP.get(tool_name, "other")
|
||||
@@ -85,18 +112,645 @@ def build_tool_title(tool_name: str, args: Dict[str, Any]) -> str:
|
||||
if urls:
|
||||
return f"extract: {urls[0]}" + (f" (+{len(urls)-1})" if len(urls) > 1 else "")
|
||||
return "web extract"
|
||||
if tool_name == "process":
|
||||
action = str(args.get("action") or "").strip() or "manage"
|
||||
sid = str(args.get("session_id") or "").strip()
|
||||
return f"process {action}: {sid}" if sid else f"process {action}"
|
||||
if tool_name == "delegate_task":
|
||||
tasks = args.get("tasks")
|
||||
if isinstance(tasks, list) and tasks:
|
||||
return f"delegate batch ({len(tasks)} tasks)"
|
||||
goal = args.get("goal", "")
|
||||
if goal and len(goal) > 60:
|
||||
goal = goal[:57] + "..."
|
||||
return f"delegate: {goal}" if goal else "delegate task"
|
||||
if tool_name == "session_search":
|
||||
query = str(args.get("query") or "").strip()
|
||||
return f"session search: {query}" if query else "recent sessions"
|
||||
if tool_name == "memory":
|
||||
action = str(args.get("action") or "manage").strip() or "manage"
|
||||
target = str(args.get("target") or "memory").strip() or "memory"
|
||||
return f"memory {action}: {target}"
|
||||
if tool_name == "execute_code":
|
||||
return "execute code"
|
||||
code = str(args.get("code") or "").strip()
|
||||
first_line = next((line.strip() for line in code.splitlines() if line.strip()), "")
|
||||
if first_line:
|
||||
if len(first_line) > 70:
|
||||
first_line = first_line[:67] + "..."
|
||||
return f"python: {first_line}"
|
||||
return "python code"
|
||||
if tool_name == "todo":
|
||||
items = args.get("todos")
|
||||
if isinstance(items, list):
|
||||
return f"todo ({len(items)} item{'s' if len(items) != 1 else ''})"
|
||||
return "todo"
|
||||
if tool_name == "skill_view":
|
||||
name = str(args.get("name") or "?").strip() or "?"
|
||||
file_path = str(args.get("file_path") or "").strip()
|
||||
suffix = f"/{file_path}" if file_path else ""
|
||||
return f"skill view ({name}{suffix})"
|
||||
if tool_name == "skills_list":
|
||||
category = str(args.get("category") or "").strip()
|
||||
return f"skills list ({category})" if category else "skills list"
|
||||
if tool_name == "skill_manage":
|
||||
action = str(args.get("action") or "manage").strip() or "manage"
|
||||
name = str(args.get("name") or "?").strip() or "?"
|
||||
file_path = str(args.get("file_path") or "").strip()
|
||||
target = f"{name}/{file_path}" if file_path else name
|
||||
if len(target) > 64:
|
||||
target = target[:61] + "..."
|
||||
return f"skill {action}: {target}"
|
||||
if tool_name == "browser_navigate":
|
||||
return f"navigate: {args.get('url', '?')}"
|
||||
if tool_name == "browser_snapshot":
|
||||
return "browser snapshot"
|
||||
if tool_name == "browser_vision":
|
||||
return f"browser vision: {str(args.get('question', '?'))[:50]}"
|
||||
if tool_name == "browser_get_images":
|
||||
return "browser images"
|
||||
if tool_name == "vision_analyze":
|
||||
return f"analyze image: {args.get('question', '?')[:50]}"
|
||||
return f"analyze image: {str(args.get('question', '?'))[:50]}"
|
||||
if tool_name == "image_generate":
|
||||
prompt = str(args.get("prompt") or args.get("description") or "").strip()
|
||||
return f"generate image: {prompt[:50]}" if prompt else "generate image"
|
||||
if tool_name == "cronjob":
|
||||
action = str(args.get("action") or "manage").strip() or "manage"
|
||||
job_id = str(args.get("job_id") or args.get("id") or "").strip()
|
||||
return f"cron {action}: {job_id}" if job_id else f"cron {action}"
|
||||
return tool_name
|
||||
|
||||
|
||||
def _text(content: str) -> Any:
|
||||
return acp.tool_content(acp.text_block(content))
|
||||
|
||||
|
||||
def _json_loads_maybe(value: Optional[str]) -> Any:
|
||||
if not isinstance(value, str):
|
||||
return value
|
||||
try:
|
||||
return json.loads(value)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Some Hermes tools append a human hint after a JSON payload, e.g.
|
||||
# ``{...}\n\n[Hint: Results truncated...]``. Keep the structured rendering path
|
||||
# by decoding the first JSON value instead of falling back to raw text.
|
||||
try:
|
||||
decoded, _ = json.JSONDecoder().raw_decode(value.lstrip())
|
||||
return decoded
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _truncate_text(text: str, limit: int = 5000) -> str:
|
||||
if len(text) <= limit:
|
||||
return text
|
||||
return text[: max(0, limit - 100)] + f"\n... ({len(text)} chars total, truncated)"
|
||||
|
||||
|
||||
def _fenced_text(text: str, language: str = "") -> str:
|
||||
"""Return a Markdown fence that cannot be broken by backticks in text."""
|
||||
longest = max((len(run) for run in text.split("`")[1::2]), default=0)
|
||||
fence = "`" * max(3, longest + 1)
|
||||
return f"{fence}{language}\n{text}\n{fence}"
|
||||
|
||||
|
||||
def _format_todo_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict) or not isinstance(data.get("todos"), list):
|
||||
return None
|
||||
summary = data.get("summary") if isinstance(data.get("summary"), dict) else {}
|
||||
icon = {
|
||||
"completed": "✅",
|
||||
"in_progress": "🔄",
|
||||
"pending": "⏳",
|
||||
"cancelled": "✗",
|
||||
}
|
||||
lines = ["**Todo list**", ""]
|
||||
for item in data["todos"]:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
status = str(item.get("status") or "pending")
|
||||
content = str(item.get("content") or item.get("id") or "").strip()
|
||||
if content:
|
||||
lines.append(f"- {icon.get(status, '•')} {content}")
|
||||
if summary:
|
||||
cancelled = summary.get("cancelled", 0)
|
||||
lines.extend([
|
||||
"",
|
||||
"**Progress:** "
|
||||
f"{summary.get('completed', 0)} completed, "
|
||||
f"{summary.get('in_progress', 0)} in progress, "
|
||||
f"{summary.get('pending', 0)} pending"
|
||||
+ (f", {cancelled} cancelled" if cancelled else ""),
|
||||
])
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_read_file_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if data.get("error") and not data.get("content"):
|
||||
return f"Read failed: {data.get('error')}"
|
||||
content = data.get("content")
|
||||
if not isinstance(content, str):
|
||||
return None
|
||||
path = str((args or {}).get("path") or data.get("path") or "file").strip()
|
||||
offset = (args or {}).get("offset")
|
||||
limit = (args or {}).get("limit")
|
||||
range_bits = []
|
||||
if offset:
|
||||
range_bits.append(f"from line {offset}")
|
||||
if limit:
|
||||
range_bits.append(f"limit {limit}")
|
||||
suffix = f" ({', '.join(range_bits)})" if range_bits else ""
|
||||
header = f"Read {path}{suffix}"
|
||||
if data.get("total_lines") is not None:
|
||||
header += f" — {data.get('total_lines')} total lines"
|
||||
# Hermes read_file output is line-numbered with `|`. If we send it as raw
|
||||
# Markdown, Zed can interpret pipes as tables and collapse the layout.
|
||||
# Fence the payload so file lines stay readable and literal.
|
||||
return _truncate_text(f"{header}\n\n{_fenced_text(content)}")
|
||||
|
||||
|
||||
def _format_search_files_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
matches = data.get("matches")
|
||||
if not isinstance(matches, list):
|
||||
return None
|
||||
|
||||
total = data.get("total_count", len(matches))
|
||||
shown = min(len(matches), 12)
|
||||
truncated = bool(data.get("truncated")) or len(matches) > shown
|
||||
lines = [
|
||||
"Search results",
|
||||
f"Found {total} match{'es' if total != 1 else ''}; showing {shown}.",
|
||||
"",
|
||||
]
|
||||
|
||||
for match in matches[:shown]:
|
||||
if not isinstance(match, dict):
|
||||
lines.append(f"- {match}")
|
||||
continue
|
||||
|
||||
path = str(match.get("path") or match.get("file") or match.get("filename") or "?")
|
||||
line = match.get("line") or match.get("line_number")
|
||||
content = str(match.get("content") or match.get("text") or "").strip()
|
||||
loc = f"{path}:{line}" if line else path
|
||||
lines.append(f"- {loc}")
|
||||
if content:
|
||||
snippet = _truncate_text(" ".join(content.split()), 300)
|
||||
lines.append(f" {snippet}")
|
||||
|
||||
if truncated:
|
||||
lines.extend([
|
||||
"",
|
||||
"Results truncated. Narrow the search, add file_glob, or use offset to page.",
|
||||
])
|
||||
return _truncate_text("\n".join(lines), limit=7000)
|
||||
|
||||
|
||||
def _format_execute_code_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return result if isinstance(result, str) and result.strip() else None
|
||||
output = str(data.get("output") or "")
|
||||
error = str(data.get("error") or "")
|
||||
exit_code = data.get("exit_code")
|
||||
parts = [f"Exit code: {exit_code}" if exit_code is not None else "Execution complete"]
|
||||
if output:
|
||||
parts.extend(["", "Output:", output])
|
||||
if error:
|
||||
parts.extend(["", "Error:", error])
|
||||
return _truncate_text("\n".join(parts))
|
||||
|
||||
|
||||
def _extract_markdown_headings(content: str, limit: int = 8) -> list[str]:
|
||||
headings: list[str] = []
|
||||
for line in content.splitlines():
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("#"):
|
||||
heading = stripped.lstrip("#").strip()
|
||||
if heading:
|
||||
headings.append(heading)
|
||||
if len(headings) >= limit:
|
||||
break
|
||||
return headings
|
||||
|
||||
|
||||
def _format_skill_view_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if data.get("success") is False:
|
||||
return f"Skill view failed: {data.get('error', 'unknown error')}"
|
||||
name = str(data.get("name") or "skill")
|
||||
file_path = str(data.get("file") or data.get("path") or "SKILL.md")
|
||||
description = str(data.get("description") or "").strip()
|
||||
content = str(data.get("content") or "")
|
||||
linked = data.get("linked_files") if isinstance(data.get("linked_files"), dict) else None
|
||||
|
||||
lines = ["**Skill loaded**", "", f"- **Name:** `{name}`", f"- **File:** `{file_path}`"]
|
||||
if description:
|
||||
lines.append(f"- **Description:** {description}")
|
||||
if content:
|
||||
lines.append(f"- **Content:** {len(content):,} chars loaded into agent context")
|
||||
if linked:
|
||||
linked_count = sum(len(v) for v in linked.values() if isinstance(v, list))
|
||||
lines.append(f"- **Linked files:** {linked_count}")
|
||||
|
||||
headings = _extract_markdown_headings(content)
|
||||
if headings:
|
||||
lines.extend(["", "**Sections**"])
|
||||
lines.extend(f"- {heading}" for heading in headings)
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
"_Full skill content is available to the agent but hidden here to keep ACP readable._",
|
||||
])
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_skill_manage_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
|
||||
action = str((args or {}).get("action") or "manage").strip() or "manage"
|
||||
name = str((args or {}).get("name") or data.get("name") or "skill").strip() or "skill"
|
||||
file_path = str((args or {}).get("file_path") or data.get("file_path") or "SKILL.md").strip() or "SKILL.md"
|
||||
success = data.get("success")
|
||||
status = "✅ Skill updated" if success is not False else "✗ Skill update failed"
|
||||
|
||||
lines = [f"**{status}**", "", f"- **Action:** `{action}`", f"- **Skill:** `{name}`"]
|
||||
if action not in {"delete"}:
|
||||
lines.append(f"- **File:** `{file_path}`")
|
||||
|
||||
message = str(data.get("message") or data.get("error") or "").strip()
|
||||
if message:
|
||||
lines.append(f"- **Result:** {message}")
|
||||
|
||||
replacements = data.get("replacements") or data.get("replacement_count")
|
||||
if replacements is not None:
|
||||
lines.append(f"- **Replacements:** {replacements}")
|
||||
|
||||
path = str(data.get("path") or "").strip()
|
||||
if path:
|
||||
lines.append(f"- **Path:** `{path}`")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_web_search_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
web = data.get("data", {}).get("web") if isinstance(data.get("data"), dict) else data.get("web")
|
||||
if not isinstance(web, list):
|
||||
return None
|
||||
lines = [f"Web results: {len(web)}"]
|
||||
for item in web[:10]:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
title = str(item.get("title") or item.get("url") or "result").strip()
|
||||
url = str(item.get("url") or "").strip()
|
||||
desc = str(item.get("description") or "").strip()
|
||||
lines.append(f"• {title}" + (f" — {url}" if url else ""))
|
||||
if desc:
|
||||
lines.append(f" {desc}")
|
||||
return _truncate_text("\n".join(lines))
|
||||
|
||||
|
||||
def _format_web_extract_result(result: Optional[str]) -> Optional[str]:
|
||||
"""Return only web_extract errors for ACP; success stays compact via title."""
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if data.get("success") is False and data.get("error"):
|
||||
return f"Web extract failed: {data.get('error')}"
|
||||
results = data.get("results")
|
||||
if not isinstance(results, list):
|
||||
return None
|
||||
|
||||
failures: list[str] = []
|
||||
for item in results[:10]:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
error = str(item.get("error") or "").strip()
|
||||
if not error or error in {"None", "null"}:
|
||||
continue
|
||||
url = str(item.get("url") or "").strip()
|
||||
title = str(item.get("title") or url or "Untitled").strip()
|
||||
failures.append(
|
||||
f"- {title}" + (f" — {url}" if url and url != title else "") + f"\n Error: {_truncate_text(error, limit=500)}"
|
||||
)
|
||||
|
||||
if not failures:
|
||||
return None
|
||||
lines = [f"Web extract failed for {len(failures)} URL{'s' if len(failures) != 1 else ''}"]
|
||||
lines.extend(failures)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_process_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return result if isinstance(result, str) and result.strip() else None
|
||||
if data.get("success") is False and data.get("error"):
|
||||
return f"Process error: {data.get('error')}"
|
||||
action = str((args or {}).get("action") or "process").strip() or "process"
|
||||
if isinstance(data.get("processes"), list):
|
||||
processes = data["processes"]
|
||||
lines = [f"Processes: {len(processes)}"]
|
||||
for proc in processes[:20]:
|
||||
if not isinstance(proc, dict):
|
||||
lines.append(f"- {proc}")
|
||||
continue
|
||||
sid = str(proc.get("session_id") or proc.get("id") or "?")
|
||||
status = str(proc.get("status") or ("exited" if proc.get("exited") else "running"))
|
||||
cmd = str(proc.get("command") or "").strip()
|
||||
pid = proc.get("pid")
|
||||
code = proc.get("exit_code")
|
||||
bits = [status]
|
||||
if pid is not None:
|
||||
bits.append(f"pid {pid}")
|
||||
if code is not None:
|
||||
bits.append(f"exit {code}")
|
||||
lines.append(f"- `{sid}` — {', '.join(bits)}" + (f" — {cmd[:120]}" if cmd else ""))
|
||||
if len(processes) > 20:
|
||||
lines.append(f"... {len(processes) - 20} more process(es)")
|
||||
return "\n".join(lines)
|
||||
|
||||
status = str(data.get("status") or data.get("state") or action).strip()
|
||||
sid = str(data.get("session_id") or (args or {}).get("session_id") or "").strip()
|
||||
lines = [f"Process {action}: {status}" + (f" (`{sid}`)" if sid else "")]
|
||||
for key, label in (("command", "Command"), ("pid", "PID"), ("exit_code", "Exit code"), ("returncode", "Exit code"), ("lines", "Lines")):
|
||||
if data.get(key) is not None:
|
||||
lines.append(f"- **{label}:** {data.get(key)}")
|
||||
output = data.get("output") or data.get("new_output") or data.get("log") or data.get("stdout")
|
||||
error = data.get("error") or data.get("stderr")
|
||||
if output:
|
||||
lines.extend(["", "Output:", _truncate_text(str(output), limit=5000)])
|
||||
if error:
|
||||
lines.extend(["", "Error:", _truncate_text(str(error), limit=2000)])
|
||||
msg = data.get("message")
|
||||
if msg and not output and not error:
|
||||
lines.append(str(msg))
|
||||
return _truncate_text("\n".join(lines), limit=7000)
|
||||
|
||||
|
||||
def _format_delegate_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if data.get("error") and not isinstance(data.get("results"), list):
|
||||
return f"Delegation failed: {data.get('error')}"
|
||||
results = data.get("results")
|
||||
if not isinstance(results, list):
|
||||
return None
|
||||
total = data.get("total_duration_seconds")
|
||||
lines = [f"Delegation results: {len(results)} task{'s' if len(results) != 1 else ''}" + (f" in {total}s" if total is not None else "")]
|
||||
icon = {"completed": "✅", "failed": "✗", "error": "✗", "timeout": "⏱", "interrupted": "⚠"}
|
||||
for item in results:
|
||||
if not isinstance(item, dict):
|
||||
lines.append(f"- {item}")
|
||||
continue
|
||||
idx = item.get("task_index")
|
||||
status = str(item.get("status") or "unknown")
|
||||
model = item.get("model")
|
||||
dur = item.get("duration_seconds")
|
||||
role = item.get("_child_role")
|
||||
header = f"{icon.get(status, '•')} Task {idx + 1 if isinstance(idx, int) else '?'}: {status}"
|
||||
bits = []
|
||||
if model:
|
||||
bits.append(str(model))
|
||||
if role:
|
||||
bits.append(f"role={role}")
|
||||
if dur is not None:
|
||||
bits.append(f"{dur}s")
|
||||
if bits:
|
||||
header += " (" + ", ".join(bits) + ")"
|
||||
lines.extend(["", header])
|
||||
summary = str(item.get("summary") or "").strip()
|
||||
error = str(item.get("error") or "").strip()
|
||||
if summary:
|
||||
lines.append(_truncate_text(summary, limit=1200))
|
||||
if error:
|
||||
lines.append("Error: " + _truncate_text(error, limit=800))
|
||||
trace = item.get("tool_trace")
|
||||
if isinstance(trace, list) and trace:
|
||||
names = [str(t.get("tool") or "?") for t in trace if isinstance(t, dict)]
|
||||
if names:
|
||||
lines.append("Tools: " + ", ".join(names[:12]) + (f" (+{len(names)-12})" if len(names) > 12 else ""))
|
||||
return _truncate_text("\n".join(lines), limit=8000)
|
||||
|
||||
|
||||
def _format_session_search_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if data.get("success") is False:
|
||||
return f"Session search failed: {data.get('error', 'unknown error')}"
|
||||
results = data.get("results")
|
||||
if not isinstance(results, list):
|
||||
return None
|
||||
mode = data.get("mode") or "search"
|
||||
query = data.get("query")
|
||||
lines = ["Recent sessions" if mode == "recent" else f"Session search results" + (f" for `{query}`" if query else "")]
|
||||
if not results:
|
||||
lines.append(str(data.get("message") or "No matching sessions found."))
|
||||
return "\n".join(lines)
|
||||
for item in results:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
sid = str(item.get("session_id") or "?")
|
||||
title = str(item.get("title") or item.get("when") or "Untitled session").strip()
|
||||
when = str(item.get("last_active") or item.get("started_at") or item.get("when") or "").strip()
|
||||
count = item.get("message_count")
|
||||
source = str(item.get("source") or "").strip()
|
||||
meta = ", ".join(str(x) for x in [when, source, f"{count} msgs" if count is not None else ""] if x)
|
||||
lines.append(f"- **{title}** (`{sid}`)" + (f" — {meta}" if meta else ""))
|
||||
summary = str(item.get("summary") or item.get("preview") or "").strip()
|
||||
if summary:
|
||||
lines.append(" " + _truncate_text(" ".join(summary.split()), limit=500))
|
||||
return _truncate_text("\n".join(lines), limit=7000)
|
||||
|
||||
|
||||
def _format_memory_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
action = str((args or {}).get("action") or "memory").strip() or "memory"
|
||||
target = str(data.get("target") or (args or {}).get("target") or "memory")
|
||||
if data.get("success") is False:
|
||||
lines = [f"✗ Memory {action} failed ({target})", str(data.get("error") or "unknown error")]
|
||||
matches = data.get("matches")
|
||||
if isinstance(matches, list) and matches:
|
||||
lines.append("Matches:")
|
||||
lines.extend(f"- {_truncate_text(str(m), 160)}" for m in matches[:5])
|
||||
return "\n".join(lines)
|
||||
lines = [f"✅ Memory {action} saved ({target})"]
|
||||
if data.get("message"):
|
||||
lines.append(str(data.get("message")))
|
||||
if data.get("entry_count") is not None:
|
||||
lines.append(f"Entries: {data.get('entry_count')}")
|
||||
if data.get("usage"):
|
||||
lines.append(f"Usage: {data.get('usage')}")
|
||||
# Avoid dumping all memory entries into ACP UI; show only the explicit new value preview.
|
||||
preview = str((args or {}).get("content") or (args or {}).get("old_text") or "").strip()
|
||||
if preview:
|
||||
lines.append("Preview: " + _truncate_text(preview, limit=300))
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_edit_result(tool_name: str, result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
path = str((args or {}).get("path") or "file").strip()
|
||||
if isinstance(data, dict):
|
||||
if data.get("success") is False or data.get("error"):
|
||||
return f"{tool_name} failed for {path}: {data.get('error', 'unknown error')}"
|
||||
message = str(data.get("message") or "").strip()
|
||||
replacements = data.get("replacements") or data.get("replacement_count")
|
||||
lines = [f"✅ {tool_name} completed" + (f" for `{path}`" if path else "")]
|
||||
if message:
|
||||
lines.append(message)
|
||||
if replacements is not None:
|
||||
lines.append(f"Replacements: {replacements}")
|
||||
if data.get("files_modified"):
|
||||
files = data.get("files_modified")
|
||||
if isinstance(files, list):
|
||||
lines.append("Files: " + ", ".join(f"`{f}`" for f in files[:8]))
|
||||
return "\n".join(lines)
|
||||
if isinstance(result, str) and result.strip():
|
||||
return _truncate_text(result, limit=3000)
|
||||
return f"✅ {tool_name} completed" + (f" for `{path}`" if path else "")
|
||||
|
||||
|
||||
def _format_browser_result(tool_name: str, result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return result if isinstance(result, str) and result.strip() else None
|
||||
if data.get("success") is False or data.get("error"):
|
||||
return f"{tool_name} failed: {data.get('error', 'unknown error')}"
|
||||
if tool_name == "browser_get_images":
|
||||
images = data.get("images") or data.get("data")
|
||||
if isinstance(images, list):
|
||||
lines = [f"Images found: {len(images)}"]
|
||||
for img in images[:12]:
|
||||
if isinstance(img, dict):
|
||||
alt = str(img.get("alt") or "").strip()
|
||||
url = str(img.get("url") or img.get("src") or "").strip()
|
||||
lines.append(f"- {alt or 'image'}" + (f" — {url}" if url else ""))
|
||||
return _truncate_text("\n".join(lines), limit=5000)
|
||||
title = str(data.get("title") or data.get("url") or data.get("status") or tool_name)
|
||||
text = str(data.get("text") or data.get("content") or data.get("snapshot") or data.get("analysis") or data.get("message") or "").strip()
|
||||
lines = [title]
|
||||
if data.get("url") and data.get("url") != title:
|
||||
lines.append(str(data.get("url")))
|
||||
if text:
|
||||
lines.extend(["", _truncate_text(text, limit=5000)])
|
||||
return _truncate_text("\n".join(lines), limit=7000)
|
||||
|
||||
|
||||
def _format_media_or_cron_result(tool_name: str, result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return result if isinstance(result, str) and result.strip() else None
|
||||
if data.get("success") is False or data.get("error"):
|
||||
return f"{tool_name} failed: {data.get('error', 'unknown error')}"
|
||||
lines = [f"✅ {tool_name} completed"]
|
||||
for key in ("file_path", "path", "url", "image_url", "job_id", "id", "status", "message", "next_run"):
|
||||
if data.get(key):
|
||||
lines.append(f"- **{key}:** {data.get(key)}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_generic_structured_result(tool_name: str, result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, (dict, list)):
|
||||
return result if isinstance(result, str) and result.strip() else None
|
||||
if isinstance(data, list):
|
||||
lines = [f"{tool_name}: {len(data)} item{'s' if len(data) != 1 else ''}"]
|
||||
for item in data[:12]:
|
||||
lines.append(f"- {_truncate_text(str(item), limit=240)}")
|
||||
return _truncate_text("\n".join(lines), limit=5000)
|
||||
|
||||
if data.get("success") is False or data.get("error"):
|
||||
return f"{tool_name} failed: {data.get('error', 'unknown error')}"
|
||||
|
||||
lines = [f"✅ {tool_name} completed" if data.get("success") is True else f"{tool_name} result"]
|
||||
priority_keys = (
|
||||
"message", "status", "id", "task_id", "issue_id", "title", "name", "entity_id",
|
||||
"state", "service", "url", "path", "file_path", "count", "total", "next_run",
|
||||
)
|
||||
seen = set()
|
||||
for key in priority_keys:
|
||||
value = data.get(key)
|
||||
if value in (None, "", [], {}):
|
||||
continue
|
||||
seen.add(key)
|
||||
lines.append(f"- **{key}:** {_truncate_text(str(value), limit=500)}")
|
||||
|
||||
for key, value in data.items():
|
||||
if key in seen or key in {"success", "raw", "content", "entries"}:
|
||||
continue
|
||||
if value in (None, "", [], {}):
|
||||
continue
|
||||
if isinstance(value, (dict, list)):
|
||||
preview = json.dumps(value, ensure_ascii=False, default=str)
|
||||
else:
|
||||
preview = str(value)
|
||||
lines.append(f"- **{key}:** {_truncate_text(preview, limit=500)}")
|
||||
if len(lines) >= 14:
|
||||
break
|
||||
|
||||
content = data.get("content")
|
||||
if isinstance(content, str) and content.strip():
|
||||
lines.extend(["", _truncate_text(content.strip(), limit=1500)])
|
||||
return _truncate_text("\n".join(lines), limit=7000)
|
||||
|
||||
|
||||
def _build_polished_completion_content(
|
||||
tool_name: str,
|
||||
result: Optional[str],
|
||||
function_args: Optional[Dict[str, Any]],
|
||||
) -> Optional[List[Any]]:
|
||||
formatter = {
|
||||
"todo": lambda: _format_todo_result(result),
|
||||
"read_file": lambda: _format_read_file_result(result, function_args),
|
||||
"write_file": lambda: _format_edit_result(tool_name, result, function_args),
|
||||
"patch": lambda: _format_edit_result(tool_name, result, function_args),
|
||||
"search_files": lambda: _format_search_files_result(result),
|
||||
"execute_code": lambda: _format_execute_code_result(result),
|
||||
"process": lambda: _format_process_result(result, function_args),
|
||||
"delegate_task": lambda: _format_delegate_result(result),
|
||||
"session_search": lambda: _format_session_search_result(result),
|
||||
"memory": lambda: _format_memory_result(result, function_args),
|
||||
"skill_view": lambda: _format_skill_view_result(result),
|
||||
"skill_manage": lambda: _format_skill_manage_result(result, function_args),
|
||||
"web_search": lambda: _format_web_search_result(result),
|
||||
"web_extract": lambda: _format_web_extract_result(result),
|
||||
"browser_navigate": lambda: _format_browser_result(tool_name, result, function_args),
|
||||
"browser_snapshot": lambda: _format_browser_result(tool_name, result, function_args),
|
||||
"browser_vision": lambda: _format_browser_result(tool_name, result, function_args),
|
||||
"browser_get_images": lambda: _format_browser_result(tool_name, result, function_args),
|
||||
"vision_analyze": lambda: _format_media_or_cron_result(tool_name, result),
|
||||
"image_generate": lambda: _format_media_or_cron_result(tool_name, result),
|
||||
"cronjob": lambda: _format_media_or_cron_result(tool_name, result),
|
||||
}.get(tool_name)
|
||||
if formatter is None and tool_name in _POLISHED_TOOLS:
|
||||
formatter = lambda: _format_generic_structured_result(tool_name, result)
|
||||
if formatter is None:
|
||||
return None
|
||||
text = formatter()
|
||||
if not text:
|
||||
return None
|
||||
return [_text(text)]
|
||||
|
||||
|
||||
def _build_patch_mode_content(patch_text: str) -> List[Any]:
|
||||
"""Parse V4A patch mode input into ACP diff blocks when possible."""
|
||||
if not patch_text:
|
||||
@@ -258,7 +912,11 @@ def _build_tool_complete_content(
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return [acp.tool_content(acp.text_block(display_result))]
|
||||
polished_content = _build_polished_completion_content(tool_name, result, function_args)
|
||||
if polished_content:
|
||||
return polished_content
|
||||
|
||||
return [_text(display_result)]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -288,7 +946,6 @@ def build_tool_start(
|
||||
content = _build_patch_mode_content(patch_text)
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
)
|
||||
|
||||
if tool_name == "write_file":
|
||||
@@ -297,32 +954,172 @@ def build_tool_start(
|
||||
content = [acp.tool_diff_content(path=path, new_text=file_content)]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
)
|
||||
|
||||
if tool_name == "terminal":
|
||||
command = arguments.get("command", "")
|
||||
content = [acp.tool_content(acp.text_block(f"$ {command}"))]
|
||||
content = [_text(f"$ {command}")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
)
|
||||
|
||||
if tool_name == "read_file":
|
||||
path = arguments.get("path", "")
|
||||
content = [acp.tool_content(acp.text_block(f"Reading {path}"))]
|
||||
# The title and location already identify the file. Sending a synthetic
|
||||
# "Reading ..." content block makes Zed render an unhelpful Output
|
||||
# section before the real file contents arrive on completion.
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
tool_call_id, title, kind=kind, content=None, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "search_files":
|
||||
pattern = arguments.get("pattern", "")
|
||||
target = arguments.get("target", "content")
|
||||
content = [acp.tool_content(acp.text_block(f"Searching for '{pattern}' ({target})"))]
|
||||
search_path = arguments.get("path")
|
||||
where = f" in {search_path}" if search_path else ""
|
||||
content = [_text(f"Searching for '{pattern}' ({target}){where}")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "todo":
|
||||
items = arguments.get("todos")
|
||||
if isinstance(items, list):
|
||||
preview_lines = ["Updating todo list", ""]
|
||||
for item in items[:8]:
|
||||
if isinstance(item, dict):
|
||||
preview_lines.append(f"- {item.get('status', 'pending')}: {item.get('content', item.get('id', ''))}")
|
||||
if len(items) > 8:
|
||||
preview_lines.append(f"... {len(items) - 8} more")
|
||||
content = [_text("\n".join(preview_lines))]
|
||||
else:
|
||||
content = [_text("Reading todo list")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "skill_view":
|
||||
name = str(arguments.get("name") or "?").strip() or "?"
|
||||
file_path = str(arguments.get("file_path") or "SKILL.md").strip() or "SKILL.md"
|
||||
content = [_text(f"Loading skill '{name}' ({file_path})")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "skill_manage":
|
||||
action = str(arguments.get("action") or "manage").strip() or "manage"
|
||||
name = str(arguments.get("name") or "?").strip() or "?"
|
||||
file_path = str(arguments.get("file_path") or "SKILL.md").strip() or "SKILL.md"
|
||||
path = f"skills/{name}/{file_path}" if file_path else f"skills/{name}"
|
||||
|
||||
if action == "patch":
|
||||
old = str(arguments.get("old_string") or "")
|
||||
new = str(arguments.get("new_string") or "")
|
||||
content = [acp.tool_diff_content(path=path, old_text=old or None, new_text=new)]
|
||||
elif action in {"edit", "create"}:
|
||||
content = [
|
||||
acp.tool_diff_content(
|
||||
path=path,
|
||||
new_text=str(arguments.get("content") or ""),
|
||||
)
|
||||
]
|
||||
elif action == "write_file":
|
||||
target = str(arguments.get("file_path") or "file")
|
||||
content = [
|
||||
acp.tool_diff_content(
|
||||
path=f"skills/{name}/{target}",
|
||||
new_text=str(arguments.get("file_content") or ""),
|
||||
)
|
||||
]
|
||||
elif action in {"delete", "remove_file"}:
|
||||
target = str(arguments.get("file_path") or file_path or name)
|
||||
content = [_text(f"Removing {target} from skill '{name}'")]
|
||||
else:
|
||||
content = [_text(f"Running skill_manage action '{action}' on skill '{name}' ({file_path})")]
|
||||
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "execute_code":
|
||||
code = str(arguments.get("code") or "").strip()
|
||||
preview = code[:1200] + (f"\n... ({len(code)} chars total, truncated)" if len(code) > 1200 else "")
|
||||
content = [_text(f"Running Python helper script:\n\n```python\n{preview}\n```" if preview else "Running Python helper script")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "web_search":
|
||||
query = str(arguments.get("query") or "").strip()
|
||||
content = [_text(f"Searching the web for: {query}" if query else "Searching the web")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "web_extract":
|
||||
# The title identifies the URL(s). Avoid a duplicate content block so
|
||||
# Zed renders this like read_file: compact start, concise completion.
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=None, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "process":
|
||||
action = str(arguments.get("action") or "").strip() or "manage"
|
||||
sid = str(arguments.get("session_id") or "").strip()
|
||||
data_preview = str(arguments.get("data") or "").strip()
|
||||
text = f"Process action: {action}" + (f"\nSession: {sid}" if sid else "")
|
||||
if data_preview:
|
||||
text += "\nInput: " + _truncate_text(data_preview, limit=500)
|
||||
content = [_text(text)]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "delegate_task":
|
||||
tasks = arguments.get("tasks")
|
||||
if isinstance(tasks, list) and tasks:
|
||||
lines = [f"Delegating {len(tasks)} tasks", ""]
|
||||
for i, task in enumerate(tasks[:8], 1):
|
||||
if isinstance(task, dict):
|
||||
goal = str(task.get("goal") or "").strip()
|
||||
role = str(task.get("role") or "").strip()
|
||||
lines.append(f"{i}. " + _truncate_text(goal, limit=160) + (f" ({role})" if role else ""))
|
||||
if len(tasks) > 8:
|
||||
lines.append(f"... {len(tasks) - 8} more")
|
||||
content = [_text("\n".join(lines))]
|
||||
else:
|
||||
goal = str(arguments.get("goal") or "").strip()
|
||||
content = [_text("Delegating task" + (f":\n{_truncate_text(goal, limit=800)}" if goal else ""))]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "session_search":
|
||||
query = str(arguments.get("query") or "").strip()
|
||||
content = [_text(f"Searching past sessions for: {query}" if query else "Loading recent sessions")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "memory":
|
||||
action = str(arguments.get("action") or "manage").strip() or "manage"
|
||||
target = str(arguments.get("target") or "memory").strip() or "memory"
|
||||
preview = str(arguments.get("content") or arguments.get("old_text") or "").strip()
|
||||
text = f"Memory {action} ({target})"
|
||||
if preview:
|
||||
text += "\nPreview: " + _truncate_text(preview, limit=500)
|
||||
content = [_text(text)]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name in _POLISHED_TOOLS:
|
||||
try:
|
||||
args_text = json.dumps(arguments, indent=2, default=str)
|
||||
except (TypeError, ValueError):
|
||||
args_text = str(arguments)
|
||||
content = [_text(_truncate_text(args_text, limit=1200))]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
)
|
||||
|
||||
# Generic fallback
|
||||
@@ -334,7 +1131,7 @@ def build_tool_start(
|
||||
content = [acp.tool_content(acp.text_block(args_text))]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
raw_input=None if tool_name in _POLISHED_TOOLS else arguments,
|
||||
)
|
||||
|
||||
|
||||
@@ -347,18 +1144,22 @@ def build_tool_complete(
|
||||
) -> ToolCallProgress:
|
||||
"""Create a ToolCallUpdate (progress) event for a completed tool call."""
|
||||
kind = get_tool_kind(tool_name)
|
||||
content = _build_tool_complete_content(
|
||||
tool_name,
|
||||
result,
|
||||
function_args=function_args,
|
||||
snapshot=snapshot,
|
||||
)
|
||||
if tool_name == "web_extract":
|
||||
error_text = _format_web_extract_result(result)
|
||||
content = [_text(error_text)] if error_text else None
|
||||
else:
|
||||
content = _build_tool_complete_content(
|
||||
tool_name,
|
||||
result,
|
||||
function_args=function_args,
|
||||
snapshot=snapshot,
|
||||
)
|
||||
return acp.update_tool_call(
|
||||
tool_call_id,
|
||||
kind=kind,
|
||||
status="completed",
|
||||
content=content,
|
||||
raw_output=result,
|
||||
raw_output=None if tool_name in _POLISHED_TOOLS else result,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -76,6 +76,7 @@ _ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7")
|
||||
# Models where temperature/top_p/top_k return 400 if set to non-default values.
|
||||
# This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
|
||||
_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7")
|
||||
_FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")
|
||||
|
||||
# ── Max output token limits per Anthropic model ───────────────────────
|
||||
# Source: Anthropic docs + Cline model catalog. Anthropic's API requires
|
||||
@@ -105,6 +106,9 @@ _ANTHROPIC_OUTPUT_LIMITS = {
|
||||
"claude-3-haiku": 4_096,
|
||||
# Third-party Anthropic-compatible providers
|
||||
"minimax": 131_072,
|
||||
# Qwen models via DashScope Anthropic-compatible endpoint
|
||||
# DashScope enforces max_tokens ∈ [1, 65536]
|
||||
"qwen3": 65_536,
|
||||
}
|
||||
|
||||
# For any model not in the table, assume the highest current limit.
|
||||
@@ -216,6 +220,17 @@ def _forbids_sampling_params(model: str) -> bool:
|
||||
return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS)
|
||||
|
||||
|
||||
def _supports_fast_mode(model: str) -> bool:
|
||||
"""Return True for models that support Anthropic Fast Mode (speed=fast).
|
||||
|
||||
Per Anthropic docs, fast mode is currently supported on Opus 4.6 only.
|
||||
Sending ``speed: "fast"`` to any other Claude model (including Opus 4.7)
|
||||
returns HTTP 400. This guard prevents silently 400'ing when stale config
|
||||
or older callers leave fast mode enabled across a model upgrade.
|
||||
"""
|
||||
return any(v in model for v in _FAST_MODE_SUPPORTED_SUBSTRINGS)
|
||||
|
||||
|
||||
# Beta headers for enhanced features (sent with ALL auth types).
|
||||
# As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
|
||||
# beta headers are still accepted (harmless no-op) but not required. Kept
|
||||
@@ -1222,6 +1237,14 @@ def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
|
||||
``keep_nullable_hint=False`` because the Anthropic validator does not
|
||||
recognize the OpenAPI-style ``nullable: true`` extension and strict
|
||||
schema-to-grammar converters may reject unknown keywords.
|
||||
|
||||
Top-level ``oneOf``/``allOf``/``anyOf`` are also stripped here: the
|
||||
Anthropic API rejects union keywords at the schema root with a generic
|
||||
HTTP 400. Several upstream and plugin tools ship schemas with one of
|
||||
these keywords at the top level (commonly for Pydantic discriminated
|
||||
unions). If we land here with those keywords still present after
|
||||
nullable-union stripping, drop them and fall back to a plain object
|
||||
schema so the tool still validates at the Anthropic boundary.
|
||||
"""
|
||||
if not schema:
|
||||
return {"type": "object", "properties": {}}
|
||||
@@ -1231,6 +1254,12 @@ def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
|
||||
normalized = strip_nullable_unions(schema, keep_nullable_hint=False)
|
||||
if not isinstance(normalized, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
# Strip top-level union keywords that Anthropic's validator rejects.
|
||||
banned = {"oneOf", "allOf", "anyOf"}
|
||||
if banned & normalized.keys():
|
||||
normalized = {k: v for k, v in normalized.items() if k not in banned}
|
||||
if "type" not in normalized:
|
||||
normalized["type"] = "object"
|
||||
if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict):
|
||||
normalized = {**normalized, "properties": {}}
|
||||
return normalized
|
||||
@@ -1241,10 +1270,24 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
|
||||
if not tools:
|
||||
return []
|
||||
result = []
|
||||
seen_names: set = set()
|
||||
for t in tools:
|
||||
fn = t.get("function", {})
|
||||
name = fn.get("name", "")
|
||||
# Defensive dedup: Anthropic rejects requests with duplicate tool
|
||||
# names. Upstream injection paths already dedup, but this guard
|
||||
# converts a hard API failure into a warning. See: #18478
|
||||
if name and name in seen_names:
|
||||
logger.warning(
|
||||
"convert_tools_to_anthropic: duplicate tool name '%s' "
|
||||
"— dropping second occurrence",
|
||||
name,
|
||||
)
|
||||
continue
|
||||
if name:
|
||||
seen_names.add(name)
|
||||
result.append({
|
||||
"name": fn.get("name", ""),
|
||||
"name": name,
|
||||
"description": fn.get("description", ""),
|
||||
"input_schema": _normalize_tool_input_schema(
|
||||
fn.get("parameters", {"type": "object", "properties": {}})
|
||||
@@ -1901,9 +1944,15 @@ def build_anthropic_kwargs(
|
||||
|
||||
# ── Fast mode (Opus 4.6 only) ────────────────────────────────────
|
||||
# Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x
|
||||
# output speed. Only for native Anthropic endpoints — third-party
|
||||
# providers would reject the unknown beta header and speed parameter.
|
||||
if fast_mode and not _is_third_party_anthropic_endpoint(base_url):
|
||||
# output speed. Per Anthropic docs, fast mode is only supported on
|
||||
# Opus 4.6 — Opus 4.7 and other models 400 on the speed parameter.
|
||||
# Only for native Anthropic endpoints — third-party providers would
|
||||
# reject the unknown beta header and speed parameter.
|
||||
if (
|
||||
fast_mode
|
||||
and not _is_third_party_anthropic_endpoint(base_url)
|
||||
and _supports_fast_mode(model)
|
||||
):
|
||||
kwargs.setdefault("extra_body", {})["speed"] = "fast"
|
||||
# Build extra_headers with ALL applicable betas (the per-request
|
||||
# extra_headers override the client-level anthropic-beta header).
|
||||
|
||||
+270
-34
@@ -196,6 +196,12 @@ def _is_kimi_model(model: Optional[str]) -> bool:
|
||||
return bare.startswith("kimi-") or bare == "kimi"
|
||||
|
||||
|
||||
def _is_arcee_trinity_thinking(model: Optional[str]) -> bool:
|
||||
"""True for Arcee Trinity Large Thinking (direct or via OpenRouter)."""
|
||||
bare = (model or "").strip().lower().rsplit("/", 1)[-1]
|
||||
return bare == "trinity-large-thinking"
|
||||
|
||||
|
||||
def _fixed_temperature_for_model(
|
||||
model: Optional[str],
|
||||
base_url: Optional[str] = None,
|
||||
@@ -213,10 +219,46 @@ def _fixed_temperature_for_model(
|
||||
if _is_kimi_model(model):
|
||||
logger.debug("Omitting temperature for Kimi model %r (server-managed)", model)
|
||||
return OMIT_TEMPERATURE
|
||||
if _is_arcee_trinity_thinking(model):
|
||||
return 0.5
|
||||
return None
|
||||
|
||||
|
||||
def _compression_threshold_for_model(model: Optional[str]) -> Optional[float]:
|
||||
"""Return a context-compression threshold override for specific models.
|
||||
|
||||
The threshold is the fraction of the model's context window that must be
|
||||
consumed before Hermes triggers summarization. Higher values delay
|
||||
compression and preserve more raw context.
|
||||
|
||||
Returns a float in (0, 1] to override the global ``compression.threshold``
|
||||
config value, or ``None`` to leave the user's config value unchanged.
|
||||
"""
|
||||
if _is_arcee_trinity_thinking(model):
|
||||
return 0.75
|
||||
return None
|
||||
|
||||
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
|
||||
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
def _get_aux_model_for_provider(provider_id: str) -> str:
|
||||
"""Return the cheap auxiliary model for a provider.
|
||||
|
||||
Reads from ProviderProfile.default_aux_model first, falling back to the
|
||||
legacy hardcoded dict for providers that predate the profiles system.
|
||||
"""
|
||||
try:
|
||||
from providers import get_provider_profile
|
||||
_p = get_provider_profile(provider_id)
|
||||
if _p and _p.default_aux_model:
|
||||
return _p.default_aux_model
|
||||
except Exception:
|
||||
pass
|
||||
return _API_KEY_PROVIDER_AUX_MODELS_FALLBACK.get(provider_id, "")
|
||||
|
||||
|
||||
# Fallback for providers not yet migrated to ProviderProfile.default_aux_model,
|
||||
# plus providers we intentionally keep pinned here (e.g. Anthropic predates
|
||||
# profiles). New providers should set default_aux_model on their profile instead.
|
||||
_API_KEY_PROVIDER_AUX_MODELS_FALLBACK: Dict[str, str] = {
|
||||
"gemini": "gemini-3-flash-preview",
|
||||
"zai": "glm-4.5-flash",
|
||||
"kimi-coding": "kimi-k2-turbo-preview",
|
||||
@@ -235,6 +277,10 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
"tencent-tokenhub": "hy3-preview",
|
||||
}
|
||||
|
||||
# Legacy alias — callers that haven't been updated to _get_aux_model_for_provider()
|
||||
# can still use this dict directly. Kept in sync with _FALLBACK above.
|
||||
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = _API_KEY_PROVIDER_AUX_MODELS_FALLBACK
|
||||
|
||||
# Vision-specific model overrides for direct providers.
|
||||
# When the user's main provider has a dedicated vision/multimodal model that
|
||||
# differs from their main chat model, map it here. The vision auto-detect
|
||||
@@ -259,13 +305,70 @@ _PROVIDERS_WITHOUT_VISION: frozenset = frozenset({
|
||||
"kimi-coding-cn",
|
||||
})
|
||||
|
||||
# OpenRouter app attribution headers
|
||||
_OR_HEADERS = {
|
||||
# OpenRouter app attribution headers (base — always sent).
|
||||
# `X-Title` is the canonical attribution header OpenRouter's dashboard
|
||||
# reads; the previous `X-OpenRouter-Title` label was not recognized there.
|
||||
_OR_HEADERS_BASE = {
|
||||
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||
}
|
||||
|
||||
# Truthy values for boolean env-var parsing.
|
||||
_TRUTHY_ENV_VALUES = frozenset({"1", "true", "yes", "on"})
|
||||
|
||||
|
||||
def build_or_headers(or_config: dict | None = None) -> dict:
|
||||
"""Build OpenRouter headers, optionally including response-cache headers.
|
||||
|
||||
Precedence for response cache: env var > config.yaml > default (enabled).
|
||||
|
||||
Environment variables:
|
||||
``HERMES_OPENROUTER_CACHE`` — truthy (``1``/``true``/``yes``/``on``)
|
||||
enables caching; ``0``/``false``/``no``/``off`` disables.
|
||||
Overrides ``openrouter.response_cache`` in config.yaml.
|
||||
``HERMES_OPENROUTER_CACHE_TTL`` — integer seconds (1-86400).
|
||||
Overrides ``openrouter.response_cache_ttl`` in config.yaml.
|
||||
|
||||
*or_config* is the ``openrouter`` section from config.yaml. When *None*,
|
||||
falls back to reading config from disk via ``load_config()``.
|
||||
"""
|
||||
headers = dict(_OR_HEADERS_BASE)
|
||||
|
||||
# Resolve config from disk if not provided.
|
||||
if or_config is None:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
or_config = load_config().get("openrouter", {})
|
||||
except Exception:
|
||||
or_config = {}
|
||||
|
||||
# Determine cache enabled: env var overrides config.
|
||||
env_cache = os.environ.get("HERMES_OPENROUTER_CACHE", "").strip().lower()
|
||||
if env_cache:
|
||||
cache_enabled = env_cache in _TRUTHY_ENV_VALUES
|
||||
else:
|
||||
cache_enabled = or_config.get("response_cache", False)
|
||||
|
||||
if not cache_enabled:
|
||||
return headers
|
||||
|
||||
headers["X-OpenRouter-Cache"] = "true"
|
||||
|
||||
# Determine TTL: env var overrides config.
|
||||
env_ttl = os.environ.get("HERMES_OPENROUTER_CACHE_TTL", "").strip()
|
||||
if env_ttl:
|
||||
if env_ttl.isdigit():
|
||||
ttl = int(env_ttl)
|
||||
if 1 <= ttl <= 86400:
|
||||
headers["X-OpenRouter-Cache-TTL"] = str(ttl)
|
||||
else:
|
||||
ttl = or_config.get("response_cache_ttl", 300)
|
||||
if isinstance(ttl, (int, float)) and 1 <= ttl <= 86400:
|
||||
headers["X-OpenRouter-Cache-TTL"] = str(int(ttl))
|
||||
|
||||
return headers
|
||||
|
||||
# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
|
||||
# referrerUrl and X-Title maps to appName in the gateway's analytics.
|
||||
from hermes_cli import __version__ as _HERMES_VERSION
|
||||
@@ -512,7 +615,12 @@ class _CodexCompletionsAdapter:
|
||||
# API allows it.
|
||||
pass
|
||||
else:
|
||||
effort = reasoning_cfg.get("effort", "medium")
|
||||
# Truthy-only check mirrors agent/transports/codex.py
|
||||
# build_kwargs(): falsy values (None, "", 0) fall back
|
||||
# to the default rather than being forwarded to the
|
||||
# Codex backend, which rejects e.g. {"effort": null}
|
||||
# with a 400.
|
||||
effort = reasoning_cfg.get("effort") or "medium"
|
||||
# Codex backend rejects "minimal"; clamp to "low" to
|
||||
# match the main-agent Codex transport behavior.
|
||||
if effort == "minimal":
|
||||
@@ -1095,7 +1203,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
|
||||
raw_base_url = _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
|
||||
base_url = _to_openai_base_url(raw_base_url)
|
||||
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
|
||||
model = _get_aux_model_for_provider(provider_id) or None
|
||||
if model is None:
|
||||
continue # skip provider if we don't know a valid aux model
|
||||
logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
|
||||
@@ -1111,6 +1219,14 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
else:
|
||||
try:
|
||||
from providers import get_provider_profile as _gpf_aux
|
||||
_ph_aux = _gpf_aux(provider_id)
|
||||
if _ph_aux and _ph_aux.default_headers:
|
||||
extra["default_headers"] = dict(_ph_aux.default_headers)
|
||||
except Exception:
|
||||
pass
|
||||
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
|
||||
_client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
|
||||
return _client, model
|
||||
@@ -1122,7 +1238,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
|
||||
raw_base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
|
||||
base_url = _to_openai_base_url(raw_base_url)
|
||||
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
|
||||
model = _get_aux_model_for_provider(provider_id) or None
|
||||
if model is None:
|
||||
continue # skip provider if we don't know a valid aux model
|
||||
logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
|
||||
@@ -1138,6 +1254,14 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
else:
|
||||
try:
|
||||
from providers import get_provider_profile as _gpf_aux2
|
||||
_ph_aux2 = _gpf_aux2(provider_id)
|
||||
if _ph_aux2 and _ph_aux2.default_headers:
|
||||
extra["default_headers"] = dict(_ph_aux2.default_headers)
|
||||
except Exception:
|
||||
pass
|
||||
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
|
||||
_client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
|
||||
return _client, model
|
||||
@@ -1149,23 +1273,23 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
|
||||
|
||||
|
||||
def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
pool_present, entry = _select_pool_entry("openrouter")
|
||||
if pool_present:
|
||||
or_key = _pool_runtime_api_key(entry)
|
||||
or_key = explicit_api_key or _pool_runtime_api_key(entry)
|
||||
if not or_key:
|
||||
return None, None
|
||||
base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
|
||||
logger.debug("Auxiliary client: OpenRouter via pool")
|
||||
return OpenAI(api_key=or_key, base_url=base_url,
|
||||
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
|
||||
default_headers=build_or_headers()), _OPENROUTER_MODEL
|
||||
|
||||
or_key = os.getenv("OPENROUTER_API_KEY")
|
||||
or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
|
||||
if not or_key:
|
||||
return None, None
|
||||
logger.debug("Auxiliary client: OpenRouter")
|
||||
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
|
||||
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
|
||||
default_headers=build_or_headers()), _OPENROUTER_MODEL
|
||||
|
||||
|
||||
def _describe_openrouter_unavailable() -> str:
|
||||
@@ -1474,7 +1598,7 @@ def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
|
||||
return CodexAuxiliaryClient(real_client, model), model
|
||||
|
||||
|
||||
def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
|
||||
def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optional[str]]:
|
||||
try:
|
||||
from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
|
||||
except ImportError:
|
||||
@@ -1484,10 +1608,10 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
|
||||
if pool_present:
|
||||
if entry is None:
|
||||
return None, None
|
||||
token = _pool_runtime_api_key(entry)
|
||||
token = explicit_api_key or _pool_runtime_api_key(entry)
|
||||
else:
|
||||
entry = None
|
||||
token = resolve_anthropic_token()
|
||||
token = explicit_api_key or resolve_anthropic_token()
|
||||
if not token:
|
||||
return None, None
|
||||
|
||||
@@ -1510,7 +1634,7 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
|
||||
|
||||
from agent.anthropic_adapter import _is_oauth_token
|
||||
is_oauth = _is_oauth_token(token)
|
||||
model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
|
||||
model = _get_aux_model_for_provider("anthropic") or "claude-haiku-4-5-20251001"
|
||||
logger.debug("Auxiliary client: Anthropic native (%s) at %s (oauth=%s)", model, base_url, is_oauth)
|
||||
try:
|
||||
real_client = build_anthropic_client(token, base_url)
|
||||
@@ -1588,6 +1712,39 @@ def _is_payment_error(exc: Exception) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _is_rate_limit_error(exc: Exception) -> bool:
|
||||
"""Detect rate-limit errors that warrant provider fallback.
|
||||
|
||||
Returns True for HTTP 429 errors whose message indicates rate limiting
|
||||
(as opposed to billing/quota exhaustion, which _is_payment_error handles).
|
||||
Also catches OpenAI SDK RateLimitError instances that may not set
|
||||
.status_code on the exception object.
|
||||
"""
|
||||
status = getattr(exc, "status_code", None)
|
||||
err_lower = str(exc).lower()
|
||||
|
||||
# OpenAI SDK's RateLimitError sometimes omits .status_code —
|
||||
# detect by class name so we don't miss these. (PR #8023 pattern)
|
||||
if type(exc).__name__ == "RateLimitError":
|
||||
return True
|
||||
|
||||
if status == 429:
|
||||
# Distinguish rate-limit from billing: billing keywords are handled
|
||||
# by _is_payment_error, everything else on 429 is a rate limit.
|
||||
if any(kw in err_lower for kw in (
|
||||
"rate limit", "rate_limit", "too many requests",
|
||||
"try again", "retry after", "resets in",
|
||||
)):
|
||||
return True
|
||||
# Generic 429 without billing keywords = likely a rate limit
|
||||
if not any(kw in err_lower for kw in (
|
||||
"credits", "insufficient funds", "billing",
|
||||
"payment required", "can only afford",
|
||||
)):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _is_connection_error(exc: Exception) -> bool:
|
||||
"""Detect connection/network errors that warrant provider fallback.
|
||||
|
||||
@@ -1911,7 +2068,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
|
||||
}
|
||||
sync_base_url = str(sync_client.base_url)
|
||||
if base_url_host_matches(sync_base_url, "openrouter.ai"):
|
||||
async_kwargs["default_headers"] = dict(_OR_HEADERS)
|
||||
async_kwargs["default_headers"] = build_or_headers()
|
||||
elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
|
||||
from hermes_cli.copilot_auth import copilot_request_headers
|
||||
|
||||
@@ -1977,6 +2134,12 @@ def resolve_provider_client(
|
||||
(client, resolved_model) or (None, None) if auth is unavailable.
|
||||
"""
|
||||
_validate_proxy_env_urls()
|
||||
# Preserve the original provider name before alias normalization so a
|
||||
# user-declared ``custom_providers`` entry whose name coincidentally
|
||||
# matches a built-in alias (e.g. user names their custom provider "kimi"
|
||||
# which aliases to "kimi-coding") is still reachable via the named-custom
|
||||
# branch below.
|
||||
original_provider = (provider or "").strip().lower()
|
||||
# Normalise aliases
|
||||
provider = _normalize_aux_provider(provider)
|
||||
|
||||
@@ -2047,9 +2210,9 @@ def resolve_provider_client(
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# ── OpenRouter ───────────────────────────────────────────────────
|
||||
# ── OpenRouter ───────────────────────────────────────────
|
||||
if provider == "openrouter":
|
||||
client, default = _try_openrouter()
|
||||
client, default = _try_openrouter(explicit_api_key=explicit_api_key)
|
||||
if client is None:
|
||||
logger.warning(
|
||||
"resolve_provider_client: openrouter requested but %s",
|
||||
@@ -2163,7 +2326,18 @@ def resolve_provider_client(
|
||||
# ── Named custom providers (config.yaml providers dict / custom_providers list) ───
|
||||
try:
|
||||
from hermes_cli.runtime_provider import _get_named_custom_provider
|
||||
custom_entry = _get_named_custom_provider(provider)
|
||||
# When the raw requested name is an alias (``kimi`` → ``kimi-coding``)
|
||||
# and the user defined a ``custom_providers`` entry under that alias
|
||||
# name, the custom entry is the intended target — the built-in alias
|
||||
# rewriting would otherwise hijack the request. Only preferred when
|
||||
# the raw name is an alias (not a canonical provider name) so custom
|
||||
# entries that coincidentally match a canonical provider (e.g. ``nous``)
|
||||
# still defer to the built-in per `_get_named_custom_provider`'s guard.
|
||||
custom_entry = None
|
||||
if original_provider and original_provider != provider:
|
||||
custom_entry = _get_named_custom_provider(original_provider)
|
||||
if custom_entry is None:
|
||||
custom_entry = _get_named_custom_provider(provider)
|
||||
if custom_entry:
|
||||
custom_base = custom_entry.get("base_url", "").strip()
|
||||
custom_key = custom_entry.get("api_key", "").strip()
|
||||
@@ -2264,7 +2438,7 @@ def resolve_provider_client(
|
||||
|
||||
if pconfig.auth_type == "api_key":
|
||||
if provider == "anthropic":
|
||||
client, default_model = _try_anthropic()
|
||||
client, default_model = _try_anthropic(explicit_api_key=explicit_api_key)
|
||||
if client is None:
|
||||
logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
|
||||
return None, None
|
||||
@@ -2273,6 +2447,12 @@ def resolve_provider_client(
|
||||
|
||||
creds = resolve_api_key_provider_credentials(provider)
|
||||
api_key = str(creds.get("api_key", "")).strip()
|
||||
# Honour an explicit api_key override (e.g. from a fallback_model entry
|
||||
# or a custom_providers entry) so callers that pass an explicit
|
||||
# credential can authenticate against endpoints where no built-in
|
||||
# credential is registered for this provider alias.
|
||||
if explicit_api_key:
|
||||
api_key = explicit_api_key.strip() or api_key
|
||||
if not api_key:
|
||||
tried_sources = list(pconfig.api_key_env_vars)
|
||||
if provider == "copilot":
|
||||
@@ -2284,8 +2464,13 @@ def resolve_provider_client(
|
||||
|
||||
raw_base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
|
||||
base_url = _to_openai_base_url(raw_base_url)
|
||||
# Honour an explicit base_url override from the caller — used when a
|
||||
# fallback_model entry (or custom_providers lookup) routes through a
|
||||
# built-in provider name but targets a user-specified endpoint.
|
||||
if explicit_base_url:
|
||||
base_url = _to_openai_base_url(explicit_base_url.strip().rstrip("/"))
|
||||
|
||||
default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
|
||||
default_model = _get_aux_model_for_provider(provider)
|
||||
final_model = _normalize_resolved_model(model or default_model, provider)
|
||||
|
||||
if provider == "gemini":
|
||||
@@ -2565,8 +2750,11 @@ def resolve_vision_provider_client(
|
||||
return resolved_provider, sync_client, final_model
|
||||
|
||||
if resolved_base_url:
|
||||
provider_for_base_override = (
|
||||
requested if requested and requested not in ("", "auto") else "custom"
|
||||
)
|
||||
client, final_model = resolve_provider_client(
|
||||
"custom",
|
||||
provider_for_base_override,
|
||||
model=resolved_model,
|
||||
async_mode=async_mode,
|
||||
explicit_base_url=resolved_base_url,
|
||||
@@ -2574,8 +2762,8 @@ def resolve_vision_provider_client(
|
||||
api_mode=resolved_api_mode,
|
||||
)
|
||||
if client is None:
|
||||
return "custom", None, None
|
||||
return "custom", client, final_model
|
||||
return provider_for_base_override, None, None
|
||||
return provider_for_base_override, client, final_model
|
||||
|
||||
if requested == "auto":
|
||||
# Vision auto-detection order:
|
||||
@@ -3041,8 +3229,14 @@ def _resolve_task_provider_model(
|
||||
|
||||
if task:
|
||||
# Config.yaml is the primary source for per-task overrides.
|
||||
if cfg_base_url:
|
||||
if cfg_base_url and cfg_api_key:
|
||||
# Both base_url and api_key explicitly set → custom endpoint.
|
||||
return "custom", resolved_model, cfg_base_url, cfg_api_key, resolved_api_mode
|
||||
if cfg_base_url and cfg_provider and cfg_provider != "auto":
|
||||
# base_url set without api_key but with a known provider — use
|
||||
# the provider so it can resolve credentials from env vars
|
||||
# (e.g. OPENROUTER_API_KEY) instead of locking into "custom".
|
||||
return cfg_provider, resolved_model, cfg_base_url, None, resolved_api_mode
|
||||
if cfg_provider and cfg_provider != "auto":
|
||||
return cfg_provider, resolved_model, None, None, resolved_api_mode
|
||||
|
||||
@@ -3209,7 +3403,26 @@ def _build_call_kwargs(
|
||||
kwargs["max_tokens"] = max_tokens
|
||||
|
||||
if tools:
|
||||
kwargs["tools"] = tools
|
||||
# Defensive dedup: providers like Google Vertex, Azure, and Bedrock
|
||||
# reject requests with duplicate tool names (HTTP 400). The upstream
|
||||
# injection paths (run_agent.py) already dedup, but this guard
|
||||
# converts a hard API failure into a warning if an upstream regression
|
||||
# reintroduces duplicates. See: #18478
|
||||
_seen: set = set()
|
||||
_deduped: list = []
|
||||
for _t in tools:
|
||||
_tname = (_t.get("function") or {}).get("name", "")
|
||||
if _tname and _tname in _seen:
|
||||
logger.warning(
|
||||
"_build_call_kwargs: duplicate tool name '%s' removed "
|
||||
"(provider=%s model=%s)",
|
||||
_tname, provider, model,
|
||||
)
|
||||
continue
|
||||
if _tname:
|
||||
_seen.add(_tname)
|
||||
_deduped.append(_t)
|
||||
kwargs["tools"] = _deduped
|
||||
|
||||
# Provider-specific extra_body
|
||||
merged_extra = dict(extra_body or {})
|
||||
@@ -3424,7 +3637,7 @@ def call_llm(
|
||||
except Exception as retry_err:
|
||||
# If the max_tokens retry also hits a payment or connection
|
||||
# error, fall through to the fallback chain below.
|
||||
if not (_is_payment_error(retry_err) or _is_connection_error(retry_err)):
|
||||
if not (_is_payment_error(retry_err) or _is_connection_error(retry_err) or _is_rate_limit_error(retry_err)):
|
||||
raise
|
||||
first_err = retry_err
|
||||
|
||||
@@ -3507,13 +3720,27 @@ def call_llm(
|
||||
# Codex/OAuth tokens that authenticate but whose endpoint is down,
|
||||
# and providers the user never configured that got picked up by
|
||||
# the auto-detection chain.
|
||||
should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
|
||||
#
|
||||
# ── Rate-limit fallback (#13579) ─────────────────────────────
|
||||
# When the provider returns a 429 rate-limit (not billing), fall
|
||||
# back to an alternative provider instead of exhausting retries
|
||||
# against the same rate-limited endpoint.
|
||||
should_fallback = (
|
||||
_is_payment_error(first_err)
|
||||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
)
|
||||
# Only try alternative providers when the user didn't explicitly
|
||||
# configure this task's provider. Explicit provider = hard constraint;
|
||||
# auto (the default) = best-effort fallback chain. (#7559)
|
||||
is_auto = resolved_provider in ("auto", "", None)
|
||||
if should_fallback and is_auto:
|
||||
reason = "payment error" if _is_payment_error(first_err) else "connection error"
|
||||
if _is_payment_error(first_err):
|
||||
reason = "payment error"
|
||||
elif _is_rate_limit_error(first_err):
|
||||
reason = "rate limit"
|
||||
else:
|
||||
reason = "connection error"
|
||||
logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
|
||||
task or "call", reason, resolved_provider, first_err)
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
@@ -3716,7 +3943,7 @@ async def async_call_llm(
|
||||
except Exception as retry_err:
|
||||
# If the max_tokens retry also hits a payment or connection
|
||||
# error, fall through to the fallback chain below.
|
||||
if not (_is_payment_error(retry_err) or _is_connection_error(retry_err)):
|
||||
if not (_is_payment_error(retry_err) or _is_connection_error(retry_err) or _is_rate_limit_error(retry_err)):
|
||||
raise
|
||||
first_err = retry_err
|
||||
|
||||
@@ -3785,11 +4012,20 @@ async def async_call_llm(
|
||||
return _validate_llm_response(
|
||||
await retry_client.chat.completions.create(**retry_kwargs), task)
|
||||
|
||||
# ── Payment / connection fallback (mirrors sync call_llm) ─────
|
||||
should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
|
||||
# ── Payment / connection / rate-limit fallback (mirrors sync call_llm) ──
|
||||
should_fallback = (
|
||||
_is_payment_error(first_err)
|
||||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
)
|
||||
is_auto = resolved_provider in ("auto", "", None)
|
||||
if should_fallback and is_auto:
|
||||
reason = "payment error" if _is_payment_error(first_err) else "connection error"
|
||||
if _is_payment_error(first_err):
|
||||
reason = "payment error"
|
||||
elif _is_rate_limit_error(first_err):
|
||||
reason = "rate limit"
|
||||
else:
|
||||
reason = "connection error"
|
||||
logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
|
||||
task or "call", reason, resolved_provider, first_err)
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
|
||||
+79
-12
@@ -43,6 +43,9 @@ SUMMARY_PREFIX = (
|
||||
"they were already addressed. "
|
||||
"Your current task is identified in the '## Active Task' section of the "
|
||||
"summary — resume exactly from there. "
|
||||
"IMPORTANT: Your persistent memory (MEMORY.md, USER.md) in the system "
|
||||
"prompt is ALWAYS authoritative and active — never ignore or deprioritize "
|
||||
"memory content due to this compaction note. "
|
||||
"Respond ONLY to the latest user message "
|
||||
"that appears AFTER this summary. The current session state (files, "
|
||||
"config, etc.) may reflect work described here — avoid repeating it:"
|
||||
@@ -344,6 +347,7 @@ class ContextCompressor(ContextEngine):
|
||||
self._last_aux_model_failure_model = None
|
||||
self._last_compression_savings_pct = 100.0
|
||||
self._ineffective_compression_count = 0
|
||||
self._summary_failure_cooldown_until = 0.0 # transient errors must not block a fresh session
|
||||
|
||||
def update_model(
|
||||
self,
|
||||
@@ -538,7 +542,7 @@ class ContextCompressor(ContextEngine):
|
||||
# Token-budget approach: walk backward accumulating tokens
|
||||
accumulated = 0
|
||||
boundary = len(result)
|
||||
min_protect = min(protect_tail_count, len(result) - 1)
|
||||
min_protect = min(protect_tail_count, len(result))
|
||||
for i in range(len(result) - 1, -1, -1):
|
||||
msg = result[i]
|
||||
raw_content = msg.get("content") or ""
|
||||
@@ -553,7 +557,16 @@ class ContextCompressor(ContextEngine):
|
||||
break
|
||||
accumulated += msg_tokens
|
||||
boundary = i
|
||||
prune_boundary = max(boundary, len(result) - min_protect)
|
||||
# Translate the budget walk into a "protected count", apply the
|
||||
# floor in count-space (where `max` reads naturally: protect at
|
||||
# least `min_protect` messages or whatever the budget reserved,
|
||||
# whichever is more), then convert back to a prune boundary.
|
||||
# Doing this in index-space with `max` would invert the direction
|
||||
# (smaller index = MORE protected), so a generous budget would
|
||||
# silently get truncated back down to `min_protect`.
|
||||
budget_protect_count = len(result) - boundary
|
||||
protected_count = max(budget_protect_count, min_protect)
|
||||
prune_boundary = len(result) - protected_count
|
||||
else:
|
||||
prune_boundary = len(result) - protect_tail_count
|
||||
|
||||
@@ -569,6 +582,8 @@ class ContextCompressor(ContextEngine):
|
||||
# Skip multimodal content (list of content blocks)
|
||||
if isinstance(content, list):
|
||||
continue
|
||||
if not isinstance(content, str):
|
||||
continue
|
||||
if len(content) < 200:
|
||||
continue
|
||||
h = hashlib.md5(content.encode("utf-8", errors="replace")).hexdigest()[:12]
|
||||
@@ -588,6 +603,8 @@ class ContextCompressor(ContextEngine):
|
||||
# Skip multimodal content (list of content blocks)
|
||||
if isinstance(content, list):
|
||||
continue
|
||||
if not isinstance(content, str):
|
||||
continue
|
||||
if not content or content == _PRUNED_TOOL_PLACEHOLDER:
|
||||
continue
|
||||
# Skip already-deduplicated or previously-summarized results
|
||||
@@ -903,15 +920,19 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
or "does not exist" in _err_str
|
||||
or "no available channel" in _err_str
|
||||
)
|
||||
_is_timeout = (
|
||||
_status in (408, 429, 502, 504)
|
||||
or "timeout" in _err_str
|
||||
)
|
||||
if (
|
||||
_is_model_not_found
|
||||
(_is_model_not_found or _is_timeout)
|
||||
and self.summary_model
|
||||
and self.summary_model != self.model
|
||||
and not getattr(self, "_summary_model_fallen_back", False)
|
||||
):
|
||||
self._summary_model_fallen_back = True
|
||||
logging.warning(
|
||||
"Summary model '%s' not available (%s). "
|
||||
"Summary model '%s' unavailable (%s). "
|
||||
"Falling back to main model '%s' for compression.",
|
||||
self.summary_model, e, self.model,
|
||||
)
|
||||
@@ -975,15 +996,39 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _with_summary_prefix(summary: str) -> str:
|
||||
"""Normalize summary text to the current compaction handoff format."""
|
||||
def _strip_summary_prefix(summary: str) -> str:
|
||||
"""Return summary body without the current or legacy handoff prefix."""
|
||||
text = (summary or "").strip()
|
||||
for prefix in (LEGACY_SUMMARY_PREFIX, SUMMARY_PREFIX):
|
||||
for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX):
|
||||
if text.startswith(prefix):
|
||||
text = text[len(prefix):].lstrip()
|
||||
break
|
||||
return text[len(prefix):].lstrip()
|
||||
return text
|
||||
|
||||
@classmethod
|
||||
def _with_summary_prefix(cls, summary: str) -> str:
|
||||
"""Normalize summary text to the current compaction handoff format."""
|
||||
text = cls._strip_summary_prefix(summary)
|
||||
return f"{SUMMARY_PREFIX}\n{text}" if text else SUMMARY_PREFIX
|
||||
|
||||
@staticmethod
|
||||
def _is_context_summary_content(content: Any) -> bool:
|
||||
text = _content_text_for_contains(content).lstrip()
|
||||
return text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX)
|
||||
|
||||
@classmethod
|
||||
def _find_latest_context_summary(
|
||||
cls,
|
||||
messages: List[Dict[str, Any]],
|
||||
start: int,
|
||||
end: int,
|
||||
) -> tuple[Optional[int], str]:
|
||||
"""Find the newest handoff summary inside a compression window."""
|
||||
for idx in range(end - 1, start - 1, -1):
|
||||
content = messages[idx].get("content")
|
||||
if cls._is_context_summary_content(content):
|
||||
return idx, cls._strip_summary_prefix(_content_text_for_contains(content))
|
||||
return None, ""
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tool-call / tool-result pair integrity helpers
|
||||
# ------------------------------------------------------------------
|
||||
@@ -992,8 +1037,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
def _get_tool_call_id(tc) -> str:
|
||||
"""Extract the call ID from a tool_call entry (dict or SimpleNamespace)."""
|
||||
if isinstance(tc, dict):
|
||||
return tc.get("id", "")
|
||||
return getattr(tc, "id", "") or ""
|
||||
return tc.get("call_id", "") or tc.get("id", "") or ""
|
||||
return getattr(tc, "call_id", "") or getattr(tc, "id", "") or ""
|
||||
|
||||
def _sanitize_tool_pairs(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Fix orphaned tool_call / tool_result pairs after compression.
|
||||
@@ -1290,6 +1335,15 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
return messages
|
||||
|
||||
turns_to_summarize = messages[compress_start:compress_end]
|
||||
summary_idx, summary_body = self._find_latest_context_summary(
|
||||
messages,
|
||||
compress_start,
|
||||
compress_end,
|
||||
)
|
||||
if summary_idx is not None:
|
||||
if summary_body and not self._previous_summary:
|
||||
self._previous_summary = summary_body
|
||||
turns_to_summarize = messages[summary_idx + 1:compress_end]
|
||||
|
||||
if not self.quiet_mode:
|
||||
logger.info(
|
||||
@@ -1322,7 +1376,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
msg = messages[i].copy()
|
||||
if i == 0 and msg.get("role") == "system":
|
||||
existing = msg.get("content")
|
||||
_compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
|
||||
_compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work. Your persistent memory (MEMORY.md, USER.md) remains fully authoritative regardless of compaction.]"
|
||||
if _compression_note not in _content_text_for_contains(existing):
|
||||
msg["content"] = _append_text_to_content(
|
||||
existing,
|
||||
@@ -1367,6 +1421,19 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
# Merge the summary into the first tail message instead
|
||||
# of inserting a standalone message that breaks alternation.
|
||||
_merge_summary_into_tail = True
|
||||
|
||||
# When the summary lands as a standalone role="user" message,
|
||||
# weak models read the verbatim "## Active Task" quote of a past
|
||||
# user request as fresh input (#11475, #14521). Append the explicit
|
||||
# end marker — the same one used in the merge-into-tail path — so
|
||||
# the model has a clear "summary above, not new input" signal.
|
||||
if not _merge_summary_into_tail and summary_role == "user":
|
||||
summary = (
|
||||
summary
|
||||
+ "\n\n--- END OF CONTEXT SUMMARY — "
|
||||
"respond to the message below, not the summary above ---"
|
||||
)
|
||||
|
||||
if not _merge_summary_into_tail:
|
||||
compressed.append({"role": summary_role, "content": summary})
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
@@ -13,7 +14,7 @@ from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from hermes_cli.config import get_env_value
|
||||
from hermes_cli.config import get_env_value, load_env
|
||||
import hermes_cli.auth as auth_mod
|
||||
from hermes_cli.auth import (
|
||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
@@ -1380,6 +1381,16 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
|
||||
changed = False
|
||||
active_sources: Set[str] = set()
|
||||
|
||||
# Prefer ~/.hermes/.env over os.environ — the user's config file is the
|
||||
# authoritative source for Hermes credentials. Stale env vars from parent
|
||||
# processes (Codex CLI, test scripts, etc.) should not override deliberate
|
||||
# changes to the .env file.
|
||||
def _get_env_prefer_dotenv(key: str) -> str:
|
||||
env_file = load_env()
|
||||
val = env_file.get(key) or os.environ.get(key) or ""
|
||||
return val.strip()
|
||||
|
||||
# Honour user suppression — `hermes auth remove <provider> <N>` for an
|
||||
# env-seeded credential marks the env:<VAR> source as suppressed so it
|
||||
# won't be re-seeded from the user's shell environment or ~/.hermes/.env.
|
||||
@@ -1391,8 +1402,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
def _is_source_suppressed(_p, _s): # type: ignore[misc]
|
||||
return False
|
||||
if provider == "openrouter":
|
||||
# Check both os.environ and ~/.hermes/.env file
|
||||
token = (get_env_value("OPENROUTER_API_KEY") or "").strip()
|
||||
# Prefer ~/.hermes/.env over os.environ
|
||||
token = _get_env_prefer_dotenv("OPENROUTER_API_KEY")
|
||||
if token:
|
||||
source = "env:OPENROUTER_API_KEY"
|
||||
if _is_source_suppressed(provider, source):
|
||||
@@ -1418,7 +1429,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
|
||||
env_url = ""
|
||||
if pconfig.base_url_env_var:
|
||||
env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/")
|
||||
env_url = _get_env_prefer_dotenv(pconfig.base_url_env_var).rstrip("/")
|
||||
|
||||
env_vars = list(pconfig.api_key_env_vars)
|
||||
if provider == "anthropic":
|
||||
@@ -1429,8 +1440,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
]
|
||||
|
||||
for env_var in env_vars:
|
||||
# Check both os.environ and ~/.hermes/.env file
|
||||
token = (get_env_value(env_var) or "").strip()
|
||||
# Prefer ~/.hermes/.env over os.environ
|
||||
token = _get_env_prefer_dotenv(env_var)
|
||||
if not token:
|
||||
continue
|
||||
source = f"env:{env_var}"
|
||||
|
||||
+406
-47
@@ -24,11 +24,12 @@ from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
import threading
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, Set
|
||||
from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from tools import skill_usage
|
||||
@@ -36,6 +37,22 @@ from tools import skill_usage
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _strip_aux_credential(value: Any) -> Optional[str]:
|
||||
if value is None:
|
||||
return None
|
||||
text = str(value).strip()
|
||||
return text or None
|
||||
|
||||
|
||||
class _ReviewRuntimeBinding(NamedTuple):
|
||||
"""Provider/model for the curator review fork plus optional per-slot overrides."""
|
||||
|
||||
provider: str
|
||||
model: str
|
||||
explicit_api_key: Optional[str]
|
||||
explicit_base_url: Optional[str]
|
||||
|
||||
|
||||
DEFAULT_INTERVAL_HOURS = 24 * 7 # 7 days
|
||||
DEFAULT_MIN_IDLE_HOURS = 2
|
||||
DEFAULT_STALE_AFTER_DAYS = 30
|
||||
@@ -55,6 +72,7 @@ def _default_state() -> Dict[str, Any]:
|
||||
"last_run_at": None,
|
||||
"last_run_duration_seconds": None,
|
||||
"last_run_summary": None,
|
||||
"last_report_path": None,
|
||||
"paused": False,
|
||||
"run_count": 0,
|
||||
}
|
||||
@@ -183,7 +201,16 @@ def should_run_now(now: Optional[datetime] = None) -> bool:
|
||||
Gates:
|
||||
- curator.enabled == True
|
||||
- not paused
|
||||
- last_run_at missing, OR older than interval_hours
|
||||
- last_run_at present AND older than interval_hours
|
||||
|
||||
First-run behavior: when there is no ``last_run_at`` (fresh install, or
|
||||
install that predates the curator), we DO NOT run immediately. The
|
||||
curator is designed to run after at least ``interval_hours`` (7 days by
|
||||
default) of skill activity, not on the first background tick after
|
||||
``hermes update``. On first observation we seed ``last_run_at`` to "now"
|
||||
and defer the first real pass by one full interval. Users who want to
|
||||
run it sooner can always invoke ``hermes curator run`` (with or without
|
||||
``--dry-run``) explicitly — that path bypasses this gate.
|
||||
|
||||
The idle check (min_idle_hours) is applied at the call site where we know
|
||||
whether an agent is actively running — here we only enforce the static
|
||||
@@ -197,7 +224,21 @@ def should_run_now(now: Optional[datetime] = None) -> bool:
|
||||
state = load_state()
|
||||
last = _parse_iso(state.get("last_run_at"))
|
||||
if last is None:
|
||||
return True
|
||||
# Never run before. Seed state so we wait a full interval before the
|
||||
# first real pass. Report-only; do not auto-mutate the library the
|
||||
# very first time a gateway ticks after an update.
|
||||
if now is None:
|
||||
now = datetime.now(timezone.utc)
|
||||
try:
|
||||
state["last_run_at"] = now.isoformat()
|
||||
state["last_run_summary"] = (
|
||||
"deferred first run — curator seeded, will run after one "
|
||||
"interval; use `hermes curator run --dry-run` to preview now"
|
||||
)
|
||||
save_state(state)
|
||||
except Exception as e: # pragma: no cover — best-effort persistence
|
||||
logger.debug("Failed to seed curator last_run_at: %s", e)
|
||||
return False
|
||||
|
||||
if now is None:
|
||||
now = datetime.now(timezone.utc)
|
||||
@@ -258,6 +299,33 @@ def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int
|
||||
# Review prompt for the forked agent
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
CURATOR_DRY_RUN_BANNER = (
|
||||
"═══════════════════════════════════════════════════════════════\n"
|
||||
"DRY-RUN — REPORT ONLY. DO NOT MUTATE THE SKILL LIBRARY.\n"
|
||||
"═══════════════════════════════════════════════════════════════\n"
|
||||
"\n"
|
||||
"This is a PREVIEW pass. Follow every instruction below EXCEPT:\n"
|
||||
"\n"
|
||||
" • DO NOT call skill_manage with action=patch, create, delete, "
|
||||
"write_file, or remove_file.\n"
|
||||
" • DO NOT call terminal to mv skill directories into .archive/.\n"
|
||||
" • DO NOT call terminal to mv, cp, rm, or rewrite any file under "
|
||||
"~/.hermes/skills/.\n"
|
||||
" • skills_list and skill_view are FINE — read as much as you need.\n"
|
||||
"\n"
|
||||
"Your output IS the deliverable. Produce the exact same "
|
||||
"human-readable summary and structured YAML block you would "
|
||||
"produce on a live run — but describe the actions you WOULD take, "
|
||||
"not actions you took. A downstream reviewer will read the report "
|
||||
"and decide whether to approve a live run with "
|
||||
"`hermes curator run` (no flag).\n"
|
||||
"\n"
|
||||
"If you accidentally take a mutating action, say so explicitly in "
|
||||
"the summary so the reviewer can revert it.\n"
|
||||
"═══════════════════════════════════════════════════════════════"
|
||||
)
|
||||
|
||||
|
||||
CURATOR_REVIEW_PROMPT = (
|
||||
"You are running as Hermes' background skill CURATOR. This is an "
|
||||
"UMBRELLA-BUILDING consolidation pass, not a passive audit and not a "
|
||||
@@ -336,6 +404,11 @@ CURATOR_REVIEW_PROMPT = (
|
||||
" - skill_manage action=write_file — add a references/, templates/, "
|
||||
"or scripts/ file under an existing skill (the skill must already "
|
||||
"exist)\n"
|
||||
" - skill_manage action=delete — archive a skill. MUST pass "
|
||||
"`absorbed_into=<umbrella>` when you've merged its content into another "
|
||||
"skill, or `absorbed_into=\"\"` when you're truly pruning with no "
|
||||
"forwarding target. This drives cron-job skill-reference migration — "
|
||||
"guessing from your YAML summary after the fact is fragile.\n"
|
||||
" - terminal — mv a sibling into the archive "
|
||||
"OR move its content into a support subfile\n\n"
|
||||
"'keep' is a legitimate decision ONLY when the skill is already a "
|
||||
@@ -397,6 +470,24 @@ def _reports_root() -> Path:
|
||||
return root
|
||||
|
||||
|
||||
def _needle_in_path_component(needle: str, path: str) -> bool:
|
||||
"""Check if *needle* is a complete filename stem or directory name in *path*.
|
||||
|
||||
Unlike simple substring matching, this avoids false positives where short
|
||||
skill names are embedded in longer filenames (e.g. "api" matching
|
||||
"references/api-design.md"). Hyphens and underscores are normalised so
|
||||
"open-webui-setup" matches "open_webui_setup.md".
|
||||
"""
|
||||
norm_needle = needle.replace("-", "_")
|
||||
for part in path.replace("\\", "/").split("/"):
|
||||
if not part:
|
||||
continue
|
||||
stem = part.rsplit(".", 1)[0] if "." in part else part
|
||||
if stem.replace("-", "_") == norm_needle:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _classify_removed_skills(
|
||||
removed: List[str],
|
||||
added: List[str],
|
||||
@@ -475,15 +566,29 @@ def _classify_removed_skills(
|
||||
continue
|
||||
|
||||
# Look for the removed skill's name in file_path / content / raw.
|
||||
haystacks: List[str] = []
|
||||
# Matching strategy differs by field type:
|
||||
# file_path — needle must be a complete path component
|
||||
# (filename stem or directory name), so "api" does NOT
|
||||
# falsely match "references/api-design.md".
|
||||
# content fields — word-boundary regex so "test" does NOT
|
||||
# falsely match "latest" or "testing".
|
||||
haystacks: List[tuple[str, str]] = []
|
||||
for key in ("file_path", "file_content", "content", "new_string", "_raw"):
|
||||
v = args.get(key)
|
||||
if isinstance(v, str):
|
||||
haystacks.append(v)
|
||||
haystacks.append((key, v))
|
||||
hit = False
|
||||
for hay in haystacks:
|
||||
for key, hay in haystacks:
|
||||
for needle in needles:
|
||||
if needle and needle in hay:
|
||||
if not needle:
|
||||
continue
|
||||
if key == "file_path":
|
||||
matched = _needle_in_path_component(needle, hay)
|
||||
else:
|
||||
matched = bool(
|
||||
re.search(rf'\b{re.escape(needle)}\b', hay)
|
||||
)
|
||||
if matched:
|
||||
hit = True
|
||||
evidence = (
|
||||
f"skill_manage action={args.get('action', '?')} "
|
||||
@@ -586,15 +691,76 @@ def _parse_structured_summary(
|
||||
return out
|
||||
|
||||
|
||||
def _extract_absorbed_into_declarations(
|
||||
tool_calls: List[Dict[str, Any]],
|
||||
) -> Dict[str, Dict[str, Any]]:
|
||||
"""Walk this run's tool calls and extract model-declared absorption targets.
|
||||
|
||||
The curator prompt requires every ``skill_manage(action='delete')`` call
|
||||
to pass ``absorbed_into=<umbrella>`` when consolidating, or
|
||||
``absorbed_into=""`` when truly pruning. This is the single authoritative
|
||||
signal for classification — the model's own declaration at the moment of
|
||||
deletion, which beats both post-hoc YAML summary parsing and substring
|
||||
heuristics on other tool calls.
|
||||
|
||||
Returns ``{skill_name: {"into": "<umbrella>" | "", "declared": True}}``.
|
||||
Entries with ``into == ""`` are explicit prunings.
|
||||
Skills without a ``skill_manage(delete)`` call, or with one that omitted
|
||||
``absorbed_into``, are not in the returned dict — caller falls back to
|
||||
the existing heuristic/YAML logic for those (backward compat with older
|
||||
curator runs and any callers that don't populate the arg).
|
||||
"""
|
||||
out: Dict[str, Dict[str, Any]] = {}
|
||||
for tc in tool_calls or []:
|
||||
if not isinstance(tc, dict):
|
||||
continue
|
||||
if tc.get("name") != "skill_manage":
|
||||
continue
|
||||
raw = tc.get("arguments") or ""
|
||||
args: Dict[str, Any] = {}
|
||||
if isinstance(raw, dict):
|
||||
args = raw
|
||||
elif isinstance(raw, str):
|
||||
try:
|
||||
args = json.loads(raw)
|
||||
except Exception:
|
||||
continue
|
||||
if not isinstance(args, dict):
|
||||
continue
|
||||
if args.get("action") != "delete":
|
||||
continue
|
||||
name = args.get("name")
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
continue
|
||||
# absorbed_into must be present (even empty string is meaningful);
|
||||
# missing key means the model didn't declare intent.
|
||||
if "absorbed_into" not in args:
|
||||
continue
|
||||
target = args.get("absorbed_into")
|
||||
if target is None:
|
||||
continue
|
||||
if not isinstance(target, str):
|
||||
continue
|
||||
out[name.strip()] = {"into": target.strip(), "declared": True}
|
||||
return out
|
||||
|
||||
|
||||
def _reconcile_classification(
|
||||
removed: List[str],
|
||||
heuristic: Dict[str, List[Dict[str, Any]]],
|
||||
model_block: Dict[str, List[Dict[str, str]]],
|
||||
destinations: Set[str],
|
||||
absorbed_declarations: Optional[Dict[str, Dict[str, Any]]] = None,
|
||||
) -> Dict[str, List[Dict[str, Any]]]:
|
||||
"""Merge heuristic (tool-call evidence) with the model's structured block.
|
||||
|
||||
Rules:
|
||||
Rules (evaluated in order; first match wins):
|
||||
- **Model-declared `absorbed_into` at delete time is authoritative.** Any
|
||||
entry in ``absorbed_declarations`` beats every other signal. This is
|
||||
the model telling us directly, at the moment of deletion, what it did.
|
||||
``into != ""`` and target exists → consolidated. ``into == ""`` →
|
||||
pruned. ``into != ""`` but target doesn't exist → hallucination; fall
|
||||
through to the usual signals.
|
||||
- Model-declared consolidation wins when its ``into`` target exists
|
||||
in ``destinations`` (survived or newly-created). This gives the
|
||||
model authority over intent + rationale.
|
||||
@@ -615,6 +781,8 @@ def _reconcile_classification(
|
||||
model_cons = {e["from"]: e for e in model_block.get("consolidations", [])}
|
||||
model_pruned = {e["name"]: e for e in model_block.get("prunings", [])}
|
||||
|
||||
declared = absorbed_declarations or {}
|
||||
|
||||
consolidated: List[Dict[str, Any]] = []
|
||||
pruned: List[Dict[str, Any]] = []
|
||||
|
||||
@@ -622,6 +790,36 @@ def _reconcile_classification(
|
||||
mc = model_cons.get(name)
|
||||
mp = model_pruned.get(name)
|
||||
hc = heur_cons.get(name)
|
||||
dec = declared.get(name)
|
||||
|
||||
# Authoritative: model declared `absorbed_into` at the delete call.
|
||||
if dec is not None:
|
||||
into_claim = dec.get("into", "")
|
||||
if into_claim and into_claim in destinations:
|
||||
entry: Dict[str, Any] = {
|
||||
"name": name,
|
||||
"into": into_claim,
|
||||
"source": "absorbed_into (model-declared at delete)",
|
||||
"reason": (mc.get("reason") or "") if mc else "",
|
||||
}
|
||||
if hc and hc.get("evidence"):
|
||||
entry["evidence"] = hc["evidence"]
|
||||
consolidated.append(entry)
|
||||
continue
|
||||
if into_claim == "":
|
||||
# Explicit prune declaration
|
||||
pruned.append({
|
||||
"name": name,
|
||||
"source": "absorbed_into=\"\" (model-declared prune)",
|
||||
"reason": (mp.get("reason") or "") if mp else "",
|
||||
})
|
||||
continue
|
||||
# into_claim is non-empty but target doesn't exist: the model
|
||||
# named a nonexistent umbrella at delete time. The tool already
|
||||
# rejects this at the skill_manage layer, so we shouldn't see it
|
||||
# in practice — but if it slips through (e.g. the umbrella was
|
||||
# deleted LATER in the same run), fall through to the usual
|
||||
# signals rather than trusting a broken reference.
|
||||
|
||||
# Model says consolidated — trust it if the destination is real.
|
||||
if mc and mc.get("into") in destinations:
|
||||
@@ -757,15 +955,57 @@ def _write_run_report(
|
||||
)
|
||||
model_block = _parse_structured_summary(llm_meta.get("final", "") or "")
|
||||
destinations = set(after_names) | set(added or [])
|
||||
# Authoritative signal: extract per-delete `absorbed_into` declarations
|
||||
# from this run's tool calls. These beat both the YAML summary block and
|
||||
# the substring heuristic — the model is telling us directly, at the
|
||||
# moment of deletion, whether each archived skill was consolidated
|
||||
# (into=<umbrella>) or pruned (into="").
|
||||
absorbed_declarations = _extract_absorbed_into_declarations(
|
||||
llm_meta.get("tool_calls", []) or []
|
||||
)
|
||||
classification = _reconcile_classification(
|
||||
removed=removed,
|
||||
heuristic=heuristic,
|
||||
model_block=model_block,
|
||||
destinations=destinations,
|
||||
absorbed_declarations=absorbed_declarations,
|
||||
)
|
||||
consolidated = classification["consolidated"]
|
||||
pruned = classification["pruned"]
|
||||
|
||||
# Rewrite cron job skill references. When the curator consolidates
|
||||
# skill X into umbrella Y, any cron job that lists X fails to load
|
||||
# it at run time — the scheduler skips it and the job runs without
|
||||
# the instructions it was scheduled to follow. Rewriting the
|
||||
# references in-place keeps scheduled jobs working across
|
||||
# consolidation passes. Best-effort: never let a cron-module issue
|
||||
# break the curator.
|
||||
cron_rewrites: Dict[str, Any] = {"rewrites": [], "jobs_updated": 0, "jobs_scanned": 0}
|
||||
try:
|
||||
consolidated_map = {
|
||||
e["name"]: e["into"]
|
||||
for e in consolidated
|
||||
if isinstance(e, dict) and e.get("name") and e.get("into")
|
||||
}
|
||||
pruned_names = [
|
||||
e["name"] for e in pruned
|
||||
if isinstance(e, dict) and e.get("name")
|
||||
]
|
||||
if consolidated_map or pruned_names:
|
||||
from cron.jobs import rewrite_skill_refs as _rewrite_cron_refs
|
||||
cron_rewrites = _rewrite_cron_refs(
|
||||
consolidated=consolidated_map,
|
||||
pruned=pruned_names,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Curator cron skill rewrite failed: %s", e, exc_info=True)
|
||||
cron_rewrites = {
|
||||
"rewrites": [],
|
||||
"jobs_updated": 0,
|
||||
"jobs_scanned": 0,
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
payload = {
|
||||
"started_at": started_at.isoformat(),
|
||||
"duration_seconds": round(elapsed_seconds, 2),
|
||||
@@ -781,6 +1021,7 @@ def _write_run_report(
|
||||
"consolidated_this_run": len(consolidated),
|
||||
"pruned_this_run": len(pruned),
|
||||
"state_transitions": len(transitions),
|
||||
"cron_jobs_rewritten": int(cron_rewrites.get("jobs_updated", 0)),
|
||||
"tool_calls_total": sum(tc_counts.values()),
|
||||
},
|
||||
"tool_call_counts": tc_counts,
|
||||
@@ -790,6 +1031,7 @@ def _write_run_report(
|
||||
"pruned_names": [p["name"] for p in pruned],
|
||||
"added": added,
|
||||
"state_transitions": transitions,
|
||||
"cron_rewrites": cron_rewrites,
|
||||
"llm_final": llm_meta.get("final", ""),
|
||||
"llm_summary": llm_meta.get("summary", ""),
|
||||
"llm_error": llm_meta.get("error"),
|
||||
@@ -812,6 +1054,17 @@ def _write_run_report(
|
||||
except Exception as e:
|
||||
logger.debug("Curator REPORT.md write failed: %s", e)
|
||||
|
||||
# cron_rewrites.json — only when at least one job was touched, to
|
||||
# keep run dirs uncluttered for the common no-op case.
|
||||
try:
|
||||
if int(cron_rewrites.get("jobs_updated", 0)) > 0:
|
||||
(run_dir / "cron_rewrites.json").write_text(
|
||||
json.dumps(cron_rewrites, indent=2, ensure_ascii=False) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Curator cron_rewrites.json write failed: %s", e)
|
||||
|
||||
return run_dir
|
||||
|
||||
|
||||
@@ -942,6 +1195,39 @@ def _render_report_markdown(p: Dict[str, Any]) -> str:
|
||||
lines.append(f"- `{t.get('name')}`: {t.get('from')} → {t.get('to')}")
|
||||
lines.append("")
|
||||
|
||||
# Cron job rewrites — show which scheduled jobs had their skill
|
||||
# references updated so users can audit that the auto-rewrite did
|
||||
# the right thing. Only present when at least one job changed.
|
||||
cron_rw = p.get("cron_rewrites") or {}
|
||||
cron_rewrites_list = cron_rw.get("rewrites") or []
|
||||
if cron_rewrites_list:
|
||||
lines.append(f"### Cron job skill references rewritten ({len(cron_rewrites_list)})\n")
|
||||
lines.append(
|
||||
"_Cron jobs that referenced a consolidated or pruned skill were "
|
||||
"updated in-place so they keep loading the right instructions "
|
||||
"on their next run. See `cron_rewrites.json` for the full record._\n"
|
||||
)
|
||||
SHOW = 25
|
||||
for entry in cron_rewrites_list[:SHOW]:
|
||||
job_name = entry.get("job_name") or entry.get("job_id") or "?"
|
||||
before = entry.get("before") or []
|
||||
after = entry.get("after") or []
|
||||
mapped = entry.get("mapped") or {}
|
||||
dropped = entry.get("dropped") or []
|
||||
lines.append(
|
||||
f"- `{job_name}`: `{', '.join(before)}` → `{', '.join(after) or '(none)'}`"
|
||||
)
|
||||
for old, new in mapped.items():
|
||||
lines.append(f" - `{old}` → `{new}` (consolidated)")
|
||||
for name in dropped:
|
||||
lines.append(f" - `{name}` dropped (pruned)")
|
||||
if len(cron_rewrites_list) > SHOW:
|
||||
lines.append(
|
||||
f"- … and {len(cron_rewrites_list) - SHOW} more "
|
||||
"(see `cron_rewrites.json`)"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
# Full LLM final response
|
||||
final = (p.get("llm_final") or "").strip()
|
||||
if final:
|
||||
@@ -992,6 +1278,7 @@ def _render_candidate_list() -> str:
|
||||
def run_curator_review(
|
||||
on_summary: Optional[Callable[[str], None]] = None,
|
||||
synchronous: bool = False,
|
||||
dry_run: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Execute a single curator review pass.
|
||||
|
||||
@@ -1004,9 +1291,43 @@ def run_curator_review(
|
||||
|
||||
If *synchronous* is True, the LLM review runs in the calling thread; the
|
||||
default is to spawn a daemon thread so the caller returns immediately.
|
||||
|
||||
If *dry_run* is True, the automatic stale/archive transitions are SKIPPED
|
||||
and the LLM review pass is instructed to produce a report only — no
|
||||
skill_manage mutations, no terminal archive moves. The REPORT.md still
|
||||
gets written and ``state.last_report_path`` still records it so users
|
||||
can read what the curator WOULD have done.
|
||||
"""
|
||||
start = datetime.now(timezone.utc)
|
||||
counts = apply_automatic_transitions(now=start)
|
||||
if dry_run:
|
||||
# Count candidates without mutating state.
|
||||
try:
|
||||
report = skill_usage.agent_created_report()
|
||||
counts = {
|
||||
"checked": len(report),
|
||||
"marked_stale": 0,
|
||||
"archived": 0,
|
||||
"reactivated": 0,
|
||||
}
|
||||
except Exception:
|
||||
counts = {"checked": 0, "marked_stale": 0, "archived": 0, "reactivated": 0}
|
||||
else:
|
||||
# Pre-mutation snapshot — best-effort, never blocks the run. A
|
||||
# failed snapshot logs at debug and continues (the alternative is
|
||||
# that a transient disk issue silently disables curator forever,
|
||||
# which is worse). Users who want to require snapshots can disable
|
||||
# curator entirely until they can fix disk space.
|
||||
try:
|
||||
from agent import curator_backup
|
||||
snap = curator_backup.snapshot_skills(reason="pre-curator-run")
|
||||
if snap is not None and on_summary:
|
||||
try:
|
||||
on_summary(f"curator: snapshot created ({snap.name})")
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.debug("Curator pre-run snapshot failed: %s", e, exc_info=True)
|
||||
counts = apply_automatic_transitions(now=start)
|
||||
|
||||
auto_summary_parts = []
|
||||
if counts["marked_stale"]:
|
||||
@@ -1018,11 +1339,16 @@ def run_curator_review(
|
||||
auto_summary = ", ".join(auto_summary_parts) if auto_summary_parts else "no changes"
|
||||
|
||||
# Persist state before the LLM pass so a crash mid-review still records
|
||||
# the run and doesn't immediately re-trigger.
|
||||
# the run and doesn't immediately re-trigger. In dry-run we do NOT bump
|
||||
# last_run_at or run_count — a preview shouldn't push the next scheduled
|
||||
# real pass out. We still record a summary so `hermes curator status`
|
||||
# shows that a preview ran.
|
||||
state = load_state()
|
||||
state["last_run_at"] = start.isoformat()
|
||||
state["run_count"] = int(state.get("run_count", 0)) + 1
|
||||
state["last_run_summary"] = f"auto: {auto_summary}"
|
||||
if not dry_run:
|
||||
state["last_run_at"] = start.isoformat()
|
||||
state["run_count"] = int(state.get("run_count", 0)) + 1
|
||||
prefix = "dry-run auto: " if dry_run else "auto: "
|
||||
state["last_run_summary"] = f"{prefix}{auto_summary}"
|
||||
save_state(state)
|
||||
|
||||
def _llm_pass():
|
||||
@@ -1038,7 +1364,7 @@ def run_curator_review(
|
||||
try:
|
||||
candidate_list = _render_candidate_list()
|
||||
if "No agent-created skills" in candidate_list:
|
||||
final_summary = f"auto: {auto_summary}; llm: skipped (no candidates)"
|
||||
final_summary = f"{prefix}{auto_summary}; llm: skipped (no candidates)"
|
||||
llm_meta = {
|
||||
"final": "",
|
||||
"summary": "skipped (no candidates)",
|
||||
@@ -1048,14 +1374,21 @@ def run_curator_review(
|
||||
"error": None,
|
||||
}
|
||||
else:
|
||||
prompt = f"{CURATOR_REVIEW_PROMPT}\n\n{candidate_list}"
|
||||
if dry_run:
|
||||
prompt = (
|
||||
f"{CURATOR_DRY_RUN_BANNER}\n\n"
|
||||
f"{CURATOR_REVIEW_PROMPT}\n\n"
|
||||
f"{candidate_list}"
|
||||
)
|
||||
else:
|
||||
prompt = f"{CURATOR_REVIEW_PROMPT}\n\n{candidate_list}"
|
||||
llm_meta = _run_llm_review(prompt)
|
||||
final_summary = (
|
||||
f"auto: {auto_summary}; llm: {llm_meta.get('summary', 'no change')}"
|
||||
f"{prefix}{auto_summary}; llm: {llm_meta.get('summary', 'no change')}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Curator LLM pass failed: %s", e, exc_info=True)
|
||||
final_summary = f"auto: {auto_summary}; llm: error ({e})"
|
||||
final_summary = f"{prefix}{auto_summary}; llm: error ({e})"
|
||||
llm_meta = {
|
||||
"final": "",
|
||||
"summary": f"error ({e})",
|
||||
@@ -1114,6 +1447,52 @@ def run_curator_review(
|
||||
}
|
||||
|
||||
|
||||
def _resolve_review_runtime(cfg: Dict[str, Any]) -> _ReviewRuntimeBinding:
|
||||
"""Resolve provider/model and per-slot credentials for the curator review fork.
|
||||
|
||||
Same precedence as `_resolve_review_model()`. Non-empty ``api_key`` /
|
||||
``base_url`` from the active slot are returned as explicit overrides so
|
||||
``resolve_runtime_provider`` does not silently reuse the main chat
|
||||
credential chain for a routed auxiliary model.
|
||||
"""
|
||||
_main = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {}
|
||||
_main_provider = _main.get("provider") or "auto"
|
||||
_main_model = _main.get("default") or _main.get("model") or ""
|
||||
|
||||
# 1. Canonical aux task slot
|
||||
_aux = cfg.get("auxiliary", {}) if isinstance(cfg.get("auxiliary"), dict) else {}
|
||||
_cur_task = _aux.get("curator", {}) if isinstance(_aux.get("curator"), dict) else {}
|
||||
_task_provider = (_cur_task.get("provider") or "").strip() or None
|
||||
_task_model = (_cur_task.get("model") or "").strip() or None
|
||||
if _task_provider and _task_provider != "auto" and _task_model:
|
||||
return _ReviewRuntimeBinding(
|
||||
_task_provider,
|
||||
_task_model,
|
||||
_strip_aux_credential(_cur_task.get("api_key")),
|
||||
_strip_aux_credential(_cur_task.get("base_url")),
|
||||
)
|
||||
|
||||
# 2. Legacy curator.auxiliary.{provider,model} (deprecated, pre-unification)
|
||||
_cur = cfg.get("curator", {}) if isinstance(cfg.get("curator"), dict) else {}
|
||||
_legacy = _cur.get("auxiliary", {}) if isinstance(_cur.get("auxiliary"), dict) else {}
|
||||
_legacy_provider = _legacy.get("provider") or None
|
||||
_legacy_model = _legacy.get("model") or None
|
||||
if _legacy_provider and _legacy_model:
|
||||
logger.info(
|
||||
"curator: using deprecated curator.auxiliary.{provider,model} "
|
||||
"config — please migrate to auxiliary.curator.{provider,model}"
|
||||
)
|
||||
return _ReviewRuntimeBinding(
|
||||
str(_legacy_provider),
|
||||
str(_legacy_model),
|
||||
_strip_aux_credential(_legacy.get("api_key")),
|
||||
_strip_aux_credential(_legacy.get("base_url")),
|
||||
)
|
||||
|
||||
# 3. Fall through to the main chat model
|
||||
return _ReviewRuntimeBinding(_main_provider, _main_model, None, None)
|
||||
|
||||
|
||||
def _resolve_review_model(cfg: Dict[str, Any]) -> tuple[str, str]:
|
||||
"""Pick (provider, model) for the curator review fork.
|
||||
|
||||
@@ -1129,32 +1508,8 @@ def _resolve_review_model(cfg: Dict[str, Any]) -> tuple[str, str]:
|
||||
2. Legacy ``curator.auxiliary.{provider,model}`` when both are set
|
||||
3. Main ``model.{provider,default/model}`` pair
|
||||
"""
|
||||
_main = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {}
|
||||
_main_provider = _main.get("provider") or "auto"
|
||||
_main_model = _main.get("default") or _main.get("model") or ""
|
||||
|
||||
# 1. Canonical aux task slot
|
||||
_aux = cfg.get("auxiliary", {}) if isinstance(cfg.get("auxiliary"), dict) else {}
|
||||
_cur_task = _aux.get("curator", {}) if isinstance(_aux.get("curator"), dict) else {}
|
||||
_task_provider = (_cur_task.get("provider") or "").strip() or None
|
||||
_task_model = (_cur_task.get("model") or "").strip() or None
|
||||
if _task_provider and _task_provider != "auto" and _task_model:
|
||||
return _task_provider, _task_model
|
||||
|
||||
# 2. Legacy curator.auxiliary.{provider,model} (deprecated, pre-unification)
|
||||
_cur = cfg.get("curator", {}) if isinstance(cfg.get("curator"), dict) else {}
|
||||
_legacy = _cur.get("auxiliary", {}) if isinstance(_cur.get("auxiliary"), dict) else {}
|
||||
_legacy_provider = _legacy.get("provider") or None
|
||||
_legacy_model = _legacy.get("model") or None
|
||||
if _legacy_provider and _legacy_model:
|
||||
logger.info(
|
||||
"curator: using deprecated curator.auxiliary.{provider,model} "
|
||||
"config — please migrate to auxiliary.curator.{provider,model}"
|
||||
)
|
||||
return _legacy_provider, _legacy_model
|
||||
|
||||
# 3. Fall through to the main chat model
|
||||
return _main_provider, _main_model
|
||||
b = _resolve_review_runtime(cfg)
|
||||
return b.provider, b.model
|
||||
|
||||
|
||||
def _run_llm_review(prompt: str) -> Dict[str, Any]:
|
||||
@@ -1193,10 +1548,10 @@ def _run_llm_review(prompt: str) -> Dict[str, Any]:
|
||||
# arguments hits an auto-resolution path that fails for OAuth-only
|
||||
# providers and for pool-backed credentials.
|
||||
#
|
||||
# `_resolve_review_model()` honors `auxiliary.curator.{provider,model}`
|
||||
# `_resolve_review_runtime()` honors `auxiliary.curator.{provider,model,...}`
|
||||
# (canonical aux-task slot, wired through `hermes model` → auxiliary
|
||||
# picker and the dashboard Models tab), with a legacy fallback to
|
||||
# `curator.auxiliary.{provider,model}`. See docs/user-guide/features/curator.md.
|
||||
# `curator.auxiliary.{provider,model,...}`. See docs/user-guide/features/curator.md.
|
||||
_api_key = None
|
||||
_base_url = None
|
||||
_api_mode = None
|
||||
@@ -1206,9 +1561,13 @@ def _run_llm_review(prompt: str) -> Dict[str, Any]:
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
_cfg = load_config()
|
||||
_provider, _model_name = _resolve_review_model(_cfg)
|
||||
_binding = _resolve_review_runtime(_cfg)
|
||||
_provider, _model_name = _binding.provider, _binding.model
|
||||
_rp = resolve_runtime_provider(
|
||||
requested=_provider, target_model=_model_name
|
||||
requested=_provider,
|
||||
target_model=_model_name,
|
||||
explicit_api_key=_binding.explicit_api_key,
|
||||
explicit_base_url=_binding.explicit_base_url,
|
||||
)
|
||||
_api_key = _rp.get("api_key")
|
||||
_base_url = _rp.get("base_url")
|
||||
|
||||
@@ -0,0 +1,693 @@
|
||||
"""Curator snapshot + rollback.
|
||||
|
||||
A pre-run snapshot of ``~/.hermes/skills/`` (excluding ``.curator_backups/``
|
||||
itself) is taken before any mutating curator pass. Snapshots are tar.gz
|
||||
files under ``~/.hermes/skills/.curator_backups/<utc-iso>/`` with a
|
||||
companion ``manifest.json`` describing the snapshot (reason, time, size,
|
||||
counted skill files). Rollback picks a snapshot, moves the current
|
||||
``skills/`` tree aside into another snapshot so even the rollback itself
|
||||
is undoable, then extracts the chosen snapshot into place.
|
||||
|
||||
The snapshot does NOT include:
|
||||
- ``.curator_backups/`` (would recurse)
|
||||
- ``.hub/`` (hub-installed skills — managed by the hub, not us)
|
||||
|
||||
It DOES include:
|
||||
- all SKILL.md files + their directories (``scripts/``, ``references/``,
|
||||
``templates/``, ``assets/``)
|
||||
- ``.usage.json`` (usage telemetry — needed to rehydrate state cleanly)
|
||||
- ``.archive/`` (so rollback restores previously-archived skills too)
|
||||
- ``.curator_state`` (so rolling back also restores the last-run-at
|
||||
pointer — otherwise the curator would immediately re-fire on the next
|
||||
tick)
|
||||
- ``.bundled_manifest`` (so protection markers stay consistent)
|
||||
|
||||
Alongside the skills tarball, each snapshot also captures a copy of
|
||||
``~/.hermes/cron/jobs.json`` as ``cron-jobs.json`` when it exists. Cron
|
||||
jobs reference skills by name in their ``skills``/``skill`` fields; the
|
||||
curator's consolidation pass rewrites those in place via
|
||||
``cron.jobs.rewrite_skill_refs()``. Without capturing the pre-run state,
|
||||
rolling back the skills tree would leave cron jobs pointing at the
|
||||
umbrella skills even though the narrow skills they were originally
|
||||
configured with have been restored. We store the whole jobs.json for
|
||||
fidelity but rollback only touches the ``skills``/``skill`` fields — the
|
||||
rest (schedule, next_run_at, enabled, prompt, etc.) is live state and
|
||||
we leave it alone.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tarfile
|
||||
import tempfile
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
DEFAULT_KEEP = 5
|
||||
|
||||
# Entries under skills/ that should NEVER be rolled up into a snapshot.
|
||||
# .hub/ is managed by the skills hub; rolling it back would break lockfile
|
||||
# invariants. .curator_backups is the backup dir itself — recursion bomb.
|
||||
_EXCLUDE_TOP_LEVEL = {".curator_backups", ".hub"}
|
||||
|
||||
# Snapshot id regex: UTC ISO with colons replaced by dashes so the filename
|
||||
# is portable (Windows-safe). An optional ``-NN`` suffix handles two
|
||||
# snapshots landing in the same wallclock second.
|
||||
_ID_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}Z(-\d{2})?$")
|
||||
|
||||
|
||||
def _backups_dir() -> Path:
|
||||
return get_hermes_home() / "skills" / ".curator_backups"
|
||||
|
||||
|
||||
def _skills_dir() -> Path:
|
||||
return get_hermes_home() / "skills"
|
||||
|
||||
|
||||
def _cron_jobs_file() -> Path:
|
||||
"""Source path for the live cron jobs store (``~/.hermes/cron/jobs.json``)."""
|
||||
return get_hermes_home() / "cron" / "jobs.json"
|
||||
|
||||
|
||||
CRON_JOBS_FILENAME = "cron-jobs.json"
|
||||
|
||||
|
||||
def _backup_cron_jobs_into(dest: Path) -> Dict[str, Any]:
|
||||
"""Copy the live cron jobs.json into ``dest`` as ``cron-jobs.json``.
|
||||
|
||||
Returns a small dict describing what was captured so the caller can
|
||||
fold it into the manifest. Never raises — if the cron file is missing
|
||||
or unreadable, the return dict has ``backed_up=False`` and the reason,
|
||||
and the snapshot proceeds without cron data (the snapshot is still
|
||||
useful for rolling back skills).
|
||||
"""
|
||||
src = _cron_jobs_file()
|
||||
info: Dict[str, Any] = {"backed_up": False, "jobs_count": 0}
|
||||
if not src.exists():
|
||||
info["reason"] = "no cron/jobs.json present"
|
||||
return info
|
||||
try:
|
||||
raw = src.read_text(encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.debug("Failed to read cron/jobs.json for backup: %s", e)
|
||||
info["reason"] = f"read error: {e}"
|
||||
return info
|
||||
# Count jobs as a nice diagnostic — but don't fail the snapshot if the
|
||||
# file is unparseable; just store the raw text and let rollback deal
|
||||
# with it (or not, if it's corrupted). jobs.json wraps the list as
|
||||
# `{"jobs": [...], "updated_at": ...}` — we count via that shape, and
|
||||
# fall back to bare-list shape just in case the format ever changes.
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
if isinstance(parsed, dict):
|
||||
inner = parsed.get("jobs")
|
||||
if isinstance(inner, list):
|
||||
info["jobs_count"] = len(inner)
|
||||
elif isinstance(parsed, list):
|
||||
info["jobs_count"] = len(parsed)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
info["jobs_count"] = 0
|
||||
info["parse_warning"] = "jobs.json was not valid JSON at snapshot time"
|
||||
try:
|
||||
(dest / CRON_JOBS_FILENAME).write_text(raw, encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.debug("Failed to write cron backup file: %s", e)
|
||||
info["reason"] = f"write error: {e}"
|
||||
return info
|
||||
info["backed_up"] = True
|
||||
return info
|
||||
|
||||
|
||||
def _utc_id(now: Optional[datetime] = None) -> str:
|
||||
"""UTC ISO-ish filesystem-safe timestamp: ``2026-05-01T13-05-42Z``."""
|
||||
if now is None:
|
||||
now = datetime.now(timezone.utc)
|
||||
# isoformat → "2026-05-01T13:05:42.123456+00:00"; strip subseconds and tz.
|
||||
s = now.replace(microsecond=0).isoformat()
|
||||
if s.endswith("+00:00"):
|
||||
s = s[:-6]
|
||||
return s.replace(":", "-") + "Z"
|
||||
|
||||
|
||||
def _load_config() -> Dict[str, Any]:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
except Exception as e:
|
||||
logger.debug("Failed to load config for curator backup: %s", e)
|
||||
return {}
|
||||
if not isinstance(cfg, dict):
|
||||
return {}
|
||||
cur = cfg.get("curator") or {}
|
||||
if not isinstance(cur, dict):
|
||||
return {}
|
||||
bk = cur.get("backup") or {}
|
||||
return bk if isinstance(bk, dict) else {}
|
||||
|
||||
|
||||
def is_enabled() -> bool:
|
||||
"""Default ON — the whole point of the backup is safety by default."""
|
||||
return bool(_load_config().get("enabled", True))
|
||||
|
||||
|
||||
def get_keep() -> int:
|
||||
cfg = _load_config()
|
||||
try:
|
||||
n = int(cfg.get("keep", DEFAULT_KEEP))
|
||||
except (TypeError, ValueError):
|
||||
n = DEFAULT_KEEP
|
||||
return max(1, n)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Snapshot
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _count_skill_files(base: Path) -> int:
|
||||
try:
|
||||
return sum(1 for _ in base.rglob("SKILL.md"))
|
||||
except OSError:
|
||||
return 0
|
||||
|
||||
|
||||
def _write_manifest(dest: Path, reason: str, archive_path: Path,
|
||||
skills_counted: int,
|
||||
cron_info: Optional[Dict[str, Any]] = None) -> None:
|
||||
manifest = {
|
||||
"id": dest.name,
|
||||
"reason": reason,
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"archive": archive_path.name,
|
||||
"archive_bytes": archive_path.stat().st_size,
|
||||
"skill_files": skills_counted,
|
||||
}
|
||||
if cron_info is not None:
|
||||
manifest["cron_jobs"] = {
|
||||
"backed_up": bool(cron_info.get("backed_up", False)),
|
||||
"jobs_count": int(cron_info.get("jobs_count", 0)),
|
||||
}
|
||||
if not cron_info.get("backed_up"):
|
||||
manifest["cron_jobs"]["reason"] = cron_info.get("reason", "not captured")
|
||||
if cron_info.get("parse_warning"):
|
||||
manifest["cron_jobs"]["parse_warning"] = cron_info["parse_warning"]
|
||||
(dest / "manifest.json").write_text(
|
||||
json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8"
|
||||
)
|
||||
|
||||
|
||||
def snapshot_skills(reason: str = "manual") -> Optional[Path]:
|
||||
"""Create a tar.gz snapshot of ``~/.hermes/skills/`` and prune old ones.
|
||||
|
||||
Returns the snapshot directory path, or ``None`` if the snapshot was
|
||||
skipped (backup disabled, skills dir missing, or an IO error occurred —
|
||||
in which case we log at debug and return None so the curator never
|
||||
aborts a pass because of a backup failure).
|
||||
"""
|
||||
if not is_enabled():
|
||||
logger.debug("Curator backup disabled by config; skipping snapshot")
|
||||
return None
|
||||
|
||||
skills = _skills_dir()
|
||||
if not skills.exists():
|
||||
logger.debug("No ~/.hermes/skills/ directory — nothing to back up")
|
||||
return None
|
||||
|
||||
backups = _backups_dir()
|
||||
try:
|
||||
backups.mkdir(parents=True, exist_ok=True)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to create backups dir %s: %s", backups, e)
|
||||
return None
|
||||
|
||||
# Uniquify: if a snapshot with the same second already exists (can
|
||||
# happen if two curator runs fire in the same second), append a short
|
||||
# counter. Avoids clobbering and avoids timestamp collisions.
|
||||
base_id = _utc_id()
|
||||
snap_id = base_id
|
||||
counter = 1
|
||||
while (backups / snap_id).exists():
|
||||
snap_id = f"{base_id}-{counter:02d}"
|
||||
counter += 1
|
||||
|
||||
dest = backups / snap_id
|
||||
try:
|
||||
dest.mkdir(parents=True, exist_ok=False)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to create snapshot dir %s: %s", dest, e)
|
||||
return None
|
||||
|
||||
archive = dest / "skills.tar.gz"
|
||||
try:
|
||||
# Stream into the tarball — no tempdir copy needed.
|
||||
with tarfile.open(archive, "w:gz", compresslevel=6) as tf:
|
||||
for entry in sorted(skills.iterdir()):
|
||||
if entry.name in _EXCLUDE_TOP_LEVEL:
|
||||
continue
|
||||
# arcname: store paths relative to skills/ so extraction
|
||||
# drops cleanly back into the skills dir.
|
||||
tf.add(str(entry), arcname=entry.name, recursive=True)
|
||||
# Capture cron/jobs.json alongside the tarball. Never fails the
|
||||
# snapshot — the skills side is the core guarantee; cron is
|
||||
# additive. We still record in the manifest whether it was
|
||||
# captured so rollback can surface "no cron data in this snapshot".
|
||||
cron_info = _backup_cron_jobs_into(dest)
|
||||
_write_manifest(dest, reason, archive,
|
||||
_count_skill_files(skills),
|
||||
cron_info=cron_info)
|
||||
except (OSError, tarfile.TarError) as e:
|
||||
logger.debug("Curator snapshot failed: %s", e, exc_info=True)
|
||||
# Clean up partial snapshot
|
||||
try:
|
||||
shutil.rmtree(dest, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
return None
|
||||
|
||||
_prune_old(keep=get_keep())
|
||||
logger.info("Curator snapshot created: %s (%s)", snap_id, reason)
|
||||
return dest
|
||||
|
||||
|
||||
def _prune_old(keep: int) -> List[str]:
|
||||
"""Delete regular snapshots beyond the newest *keep*. Returns deleted
|
||||
ids. Staging dirs (``.rollback-staging-*``) are implementation detail
|
||||
and pruned independently on every call."""
|
||||
backups = _backups_dir()
|
||||
if not backups.exists():
|
||||
return []
|
||||
entries: List[Tuple[str, Path]] = []
|
||||
stale_staging: List[Path] = []
|
||||
for child in backups.iterdir():
|
||||
if not child.is_dir():
|
||||
continue
|
||||
if child.name.startswith(".rollback-staging-"):
|
||||
# Staging dirs are only supposed to exist briefly during a
|
||||
# rollback. If we find one here (e.g. from a crashed rollback),
|
||||
# clean it up opportunistically.
|
||||
stale_staging.append(child)
|
||||
continue
|
||||
if _ID_RE.match(child.name):
|
||||
entries.append((child.name, child))
|
||||
# Newest first (lexicographic works because the id is UTC ISO).
|
||||
entries.sort(key=lambda t: t[0], reverse=True)
|
||||
deleted: List[str] = []
|
||||
for _, path in entries[keep:]:
|
||||
try:
|
||||
shutil.rmtree(path)
|
||||
deleted.append(path.name)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to prune %s: %s", path, e)
|
||||
for path in stale_staging:
|
||||
try:
|
||||
shutil.rmtree(path)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to clean stale staging dir %s: %s", path, e)
|
||||
return deleted
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# List + rollback
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _read_manifest(snap_dir: Path) -> Dict[str, Any]:
|
||||
mf = snap_dir / "manifest.json"
|
||||
if not mf.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(mf.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return {}
|
||||
|
||||
|
||||
def list_backups() -> List[Dict[str, Any]]:
|
||||
"""Return all restorable snapshots, newest first. Only entries with a
|
||||
real ``skills.tar.gz`` tarball are listed — transient
|
||||
``.rollback-staging-*`` directories created mid-rollback are
|
||||
implementation detail and not shown."""
|
||||
backups = _backups_dir()
|
||||
if not backups.exists():
|
||||
return []
|
||||
out: List[Dict[str, Any]] = []
|
||||
for child in sorted(backups.iterdir(), reverse=True):
|
||||
if not child.is_dir():
|
||||
continue
|
||||
if not _ID_RE.match(child.name):
|
||||
continue
|
||||
if not (child / "skills.tar.gz").exists():
|
||||
continue
|
||||
mf = _read_manifest(child)
|
||||
mf.setdefault("id", child.name)
|
||||
mf.setdefault("path", str(child))
|
||||
if "archive_bytes" not in mf:
|
||||
arc = child / "skills.tar.gz"
|
||||
try:
|
||||
mf["archive_bytes"] = arc.stat().st_size
|
||||
except OSError:
|
||||
mf["archive_bytes"] = 0
|
||||
out.append(mf)
|
||||
return out
|
||||
|
||||
|
||||
def _resolve_backup(backup_id: Optional[str]) -> Optional[Path]:
|
||||
"""Return the path of the requested backup, or the newest one if
|
||||
*backup_id* is None. Returns None if no match."""
|
||||
backups = _backups_dir()
|
||||
if not backups.exists():
|
||||
return None
|
||||
if backup_id:
|
||||
target = backups / backup_id
|
||||
if (
|
||||
target.is_dir()
|
||||
and _ID_RE.match(backup_id)
|
||||
and (target / "skills.tar.gz").exists()
|
||||
):
|
||||
return target
|
||||
return None
|
||||
candidates = [
|
||||
c for c in sorted(backups.iterdir(), reverse=True)
|
||||
if c.is_dir() and _ID_RE.match(c.name) and (c / "skills.tar.gz").exists()
|
||||
]
|
||||
return candidates[0] if candidates else None
|
||||
|
||||
|
||||
def _restore_cron_skill_links(snapshot_dir: Path) -> Dict[str, Any]:
|
||||
"""Reconcile backed-up cron skill links into the live ``cron/jobs.json``.
|
||||
|
||||
We do NOT overwrite the whole cron file. Only the ``skills`` and
|
||||
``skill`` fields are restored, and only on jobs that still exist in the
|
||||
current file (matched by ``id``). Everything else about the job —
|
||||
schedule, next_run_at, last_run_at, enabled, prompt, workdir, hooks —
|
||||
is live state that the user/scheduler has modified since the snapshot;
|
||||
overwriting it would regress unrelated cron activity.
|
||||
|
||||
Rules:
|
||||
- Jobs present in backup AND live, with differing skills → skills restored.
|
||||
- Jobs present in backup AND live, with matching skills → no-op.
|
||||
- Jobs present in backup but gone from live (user deleted the job
|
||||
after the snapshot) → skipped, noted in the return report.
|
||||
- Jobs present in live but not in backup (user created a new cron
|
||||
job after the snapshot) → left untouched.
|
||||
|
||||
Never raises; failures are captured in the return dict. Writes through
|
||||
``cron.jobs`` to pick up the same lock + atomic-write path that tick()
|
||||
uses, so we don't race the scheduler.
|
||||
"""
|
||||
report: Dict[str, Any] = {
|
||||
"attempted": False,
|
||||
"restored": [],
|
||||
"skipped_missing": [],
|
||||
"unchanged": 0,
|
||||
"error": None,
|
||||
}
|
||||
backup_file = snapshot_dir / CRON_JOBS_FILENAME
|
||||
if not backup_file.exists():
|
||||
report["error"] = f"snapshot has no {CRON_JOBS_FILENAME}"
|
||||
return report
|
||||
|
||||
try:
|
||||
backup_text = backup_file.read_text(encoding="utf-8")
|
||||
backup_parsed = json.loads(backup_text)
|
||||
except (OSError, json.JSONDecodeError) as e:
|
||||
report["error"] = f"failed to load backed-up jobs: {e}"
|
||||
return report
|
||||
# jobs.json on disk is `{"jobs": [...], "updated_at": ...}`; accept both
|
||||
# that shape and a bare list for forward compat.
|
||||
if isinstance(backup_parsed, dict):
|
||||
backup_jobs = backup_parsed.get("jobs")
|
||||
elif isinstance(backup_parsed, list):
|
||||
backup_jobs = backup_parsed
|
||||
else:
|
||||
backup_jobs = None
|
||||
if not isinstance(backup_jobs, list):
|
||||
report["error"] = "backed-up cron-jobs.json has no jobs list"
|
||||
return report
|
||||
|
||||
# Build a lookup of the backed-up skill state keyed by job id.
|
||||
# We only need the two skill-ish fields (legacy single and modern list).
|
||||
backup_by_id: Dict[str, Dict[str, Any]] = {}
|
||||
for job in backup_jobs:
|
||||
if not isinstance(job, dict):
|
||||
continue
|
||||
jid = job.get("id")
|
||||
if not isinstance(jid, str) or not jid:
|
||||
continue
|
||||
backup_by_id[jid] = {
|
||||
"skills": job.get("skills"),
|
||||
"skill": job.get("skill"),
|
||||
"name": job.get("name") or jid,
|
||||
}
|
||||
|
||||
if not backup_by_id:
|
||||
report["attempted"] = True # we tried but there was nothing to do
|
||||
return report
|
||||
|
||||
# Load and rewrite the live jobs under the scheduler's lock.
|
||||
try:
|
||||
from cron.jobs import load_jobs, save_jobs, _jobs_file_lock
|
||||
except ImportError as e:
|
||||
report["error"] = f"cron module unavailable: {e}"
|
||||
return report
|
||||
|
||||
report["attempted"] = True
|
||||
try:
|
||||
with _jobs_file_lock:
|
||||
live_jobs = load_jobs()
|
||||
changed = False
|
||||
|
||||
live_ids = set()
|
||||
for live in live_jobs:
|
||||
if not isinstance(live, dict):
|
||||
continue
|
||||
jid = live.get("id")
|
||||
if not isinstance(jid, str) or not jid:
|
||||
continue
|
||||
live_ids.add(jid)
|
||||
|
||||
backup = backup_by_id.get(jid)
|
||||
if backup is None:
|
||||
continue # live job didn't exist at snapshot time
|
||||
|
||||
cur_skills = live.get("skills")
|
||||
cur_skill = live.get("skill")
|
||||
bkp_skills = backup.get("skills")
|
||||
bkp_skill = backup.get("skill")
|
||||
|
||||
if cur_skills == bkp_skills and cur_skill == bkp_skill:
|
||||
report["unchanged"] += 1
|
||||
continue
|
||||
|
||||
# Restore. Preserve absence (don't force the key to appear
|
||||
# if the backup didn't have it either).
|
||||
if bkp_skills is None:
|
||||
live.pop("skills", None)
|
||||
else:
|
||||
live["skills"] = bkp_skills
|
||||
if bkp_skill is None:
|
||||
live.pop("skill", None)
|
||||
else:
|
||||
live["skill"] = bkp_skill
|
||||
|
||||
report["restored"].append({
|
||||
"job_id": jid,
|
||||
"job_name": backup.get("name") or jid,
|
||||
"from": {"skills": cur_skills, "skill": cur_skill},
|
||||
"to": {"skills": bkp_skills, "skill": bkp_skill},
|
||||
})
|
||||
changed = True
|
||||
|
||||
# Jobs in backup but not in live = user deleted them after snapshot
|
||||
for jid, backup in backup_by_id.items():
|
||||
if jid not in live_ids:
|
||||
report["skipped_missing"].append({
|
||||
"job_id": jid,
|
||||
"job_name": backup.get("name") or jid,
|
||||
})
|
||||
|
||||
if changed:
|
||||
save_jobs(live_jobs)
|
||||
except Exception as e: # noqa: BLE001 — rollback must not die mid-restore
|
||||
logger.debug("Cron skill-link restore failed: %s", e, exc_info=True)
|
||||
report["error"] = f"restore failed mid-flight: {e}"
|
||||
|
||||
return report
|
||||
|
||||
|
||||
|
||||
def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]]:
|
||||
"""Restore ``~/.hermes/skills/`` from a snapshot.
|
||||
|
||||
Strategy:
|
||||
1. Resolve the target snapshot (explicit id or newest regular).
|
||||
2. Take a safety snapshot of the CURRENT skills tree under
|
||||
``.curator_backups/pre-rollback-<ts>/`` so the rollback itself is
|
||||
undoable.
|
||||
3. Move all current top-level entries (except ``.curator_backups``
|
||||
and ``.hub``) into a tempdir.
|
||||
4. Extract the chosen snapshot into ``~/.hermes/skills/``.
|
||||
5. On failure during 4, move the tempdir contents back (best-effort)
|
||||
and return failure.
|
||||
|
||||
Returns ``(ok, message, snapshot_path)``.
|
||||
"""
|
||||
target = _resolve_backup(backup_id)
|
||||
if target is None:
|
||||
return (
|
||||
False,
|
||||
f"no matching backup found"
|
||||
+ (f" for id '{backup_id}'" if backup_id else "")
|
||||
+ " (use `hermes curator rollback --list` to see available snapshots)",
|
||||
None,
|
||||
)
|
||||
archive = target / "skills.tar.gz"
|
||||
if not archive.exists():
|
||||
return (False, f"snapshot {target.name} has no skills.tar.gz — corrupted?", None)
|
||||
|
||||
skills = _skills_dir()
|
||||
skills.mkdir(parents=True, exist_ok=True)
|
||||
backups = _backups_dir()
|
||||
backups.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Step 2: safety snapshot of current state FIRST. If this fails we bail
|
||||
# out before touching anything — otherwise a failed extract could leave
|
||||
# the user with no skills.
|
||||
try:
|
||||
snapshot_skills(reason=f"pre-rollback to {target.name}")
|
||||
except Exception as e:
|
||||
return (False, f"pre-rollback safety snapshot failed: {e}", None)
|
||||
|
||||
# Additionally move current entries into an internal staging dir so
|
||||
# the extract happens into an empty skills tree (predictable result).
|
||||
# This dir is implementation detail — not listed as a restorable
|
||||
# backup. The safety snapshot above is the user-facing undo handle.
|
||||
staged = backups / f".rollback-staging-{_utc_id()}"
|
||||
try:
|
||||
staged.mkdir(parents=True, exist_ok=False)
|
||||
except OSError as e:
|
||||
return (False, f"failed to create staging dir: {e}", None)
|
||||
|
||||
moved: List[Tuple[Path, Path]] = []
|
||||
try:
|
||||
for entry in list(skills.iterdir()):
|
||||
if entry.name in _EXCLUDE_TOP_LEVEL:
|
||||
continue
|
||||
dest = staged / entry.name
|
||||
shutil.move(str(entry), str(dest))
|
||||
moved.append((entry, dest))
|
||||
except OSError as e:
|
||||
# Best-effort rollback of the move
|
||||
for orig, dest in moved:
|
||||
try:
|
||||
shutil.move(str(dest), str(orig))
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree(staged, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
return (False, f"failed to stage current skills: {e}", None)
|
||||
|
||||
# Step 4: extract the snapshot into skills/
|
||||
try:
|
||||
with tarfile.open(archive, "r:gz") as tf:
|
||||
# Python 3.12+ supports filter='data' for safer extraction.
|
||||
# Fall back to the unfiltered call for older interpreters but
|
||||
# still reject absolute paths and .. components defensively.
|
||||
for member in tf.getmembers():
|
||||
name = member.name
|
||||
if name.startswith("/") or ".." in Path(name).parts:
|
||||
raise tarfile.TarError(
|
||||
f"refusing to extract unsafe path: {name!r}"
|
||||
)
|
||||
try:
|
||||
tf.extractall(str(skills), filter="data") # type: ignore[call-arg]
|
||||
except TypeError:
|
||||
# Python < 3.12 — no filter kwarg
|
||||
tf.extractall(str(skills))
|
||||
except (OSError, tarfile.TarError) as e:
|
||||
# Best-effort recover: move staged contents back
|
||||
for orig, dest in moved:
|
||||
try:
|
||||
shutil.move(str(dest), str(orig))
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree(staged, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
return (False, f"snapshot extract failed (state restored): {e}", None)
|
||||
|
||||
# Extract succeeded — the staging dir has served its purpose. The
|
||||
# user's undo handle is the safety snapshot tarball we took earlier.
|
||||
try:
|
||||
shutil.rmtree(staged, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# Reconcile cron skill-links. Surgical: only the skills/skill fields
|
||||
# on jobs matched by id. Everything else in jobs.json is live state
|
||||
# (schedule, next_run_at, enabled, prompt, etc.) and we leave it
|
||||
# alone. Failures here don't fail the overall rollback — the skills
|
||||
# tree is already restored, which is the main guarantee.
|
||||
cron_report = _restore_cron_skill_links(target)
|
||||
|
||||
summary_bits = [f"restored from snapshot {target.name}"]
|
||||
if cron_report.get("attempted"):
|
||||
restored_n = len(cron_report.get("restored") or [])
|
||||
skipped_n = len(cron_report.get("skipped_missing") or [])
|
||||
if cron_report.get("error"):
|
||||
summary_bits.append(f"cron links: error — {cron_report['error']}")
|
||||
elif restored_n == 0 and skipped_n == 0 and cron_report.get("unchanged", 0) == 0:
|
||||
# Attempted but nothing matched — empty snapshot or no overlapping ids.
|
||||
pass
|
||||
else:
|
||||
parts = []
|
||||
if restored_n:
|
||||
parts.append(f"{restored_n} job(s) had skill links restored")
|
||||
if skipped_n:
|
||||
parts.append(f"{skipped_n} backed-up job(s) no longer exist (skipped)")
|
||||
if cron_report.get("unchanged"):
|
||||
parts.append(f"{cron_report['unchanged']} already matched")
|
||||
summary_bits.append("cron links: " + ", ".join(parts))
|
||||
|
||||
logger.info("Curator rollback: restored from %s (cron_report=%s)",
|
||||
target.name, cron_report)
|
||||
return (True, "; ".join(summary_bits), target)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Human-readable summary for CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def format_size(n: int) -> str:
|
||||
for unit in ("B", "KB", "MB", "GB"):
|
||||
if n < 1024 or unit == "GB":
|
||||
return f"{n:.1f} {unit}" if unit != "B" else f"{n} B"
|
||||
n /= 1024
|
||||
return f"{n:.1f} GB"
|
||||
|
||||
|
||||
def summarize_backups() -> str:
|
||||
rows = list_backups()
|
||||
if not rows:
|
||||
return "No curator snapshots yet."
|
||||
lines = [f"{'id':<24} {'reason':<40} {'skills':>6} {'size':>8}"]
|
||||
lines.append("─" * len(lines[0]))
|
||||
for r in rows:
|
||||
lines.append(
|
||||
f"{r.get('id','?'):<24} "
|
||||
f"{(r.get('reason','?') or '?')[:40]:<40} "
|
||||
f"{r.get('skill_files', 0):>6} "
|
||||
f"{format_size(int(r.get('archive_bytes', 0))):>8}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
@@ -55,6 +55,7 @@ class FailoverReason(enum.Enum):
|
||||
thinking_signature = "thinking_signature" # Anthropic thinking block sig invalid
|
||||
long_context_tier = "long_context_tier" # Anthropic "extra usage" tier gate
|
||||
oauth_long_context_beta_forbidden = "oauth_long_context_beta_forbidden" # Anthropic OAuth subscription rejects 1M context beta — disable beta and retry
|
||||
llama_cpp_grammar_pattern = "llama_cpp_grammar_pattern" # llama.cpp json-schema-to-grammar rejects regex escapes in `pattern` / `format` — strip from tools and retry
|
||||
|
||||
# Catch-all
|
||||
unknown = "unknown" # Unclassifiable — retry with backoff
|
||||
@@ -470,6 +471,31 @@ def classify_api_error(
|
||||
should_compress=False,
|
||||
)
|
||||
|
||||
# llama.cpp's ``json-schema-to-grammar`` converter (used by its OAI
|
||||
# server to build GBNF tool-call parsers) rejects regex escape classes
|
||||
# like ``\d``/``\w``/``\s`` and most ``format`` values. MCP servers
|
||||
# routinely emit ``"pattern": "\\d{4}-\\d{2}-\\d{2}"`` for date/phone/
|
||||
# email params. llama.cpp surfaces this as HTTP 400 with one of a few
|
||||
# recognizable phrases; on match we strip ``pattern``/``format`` from
|
||||
# ``self.tools`` in the retry loop and retry once. Cloud providers are
|
||||
# unaffected — they accept these keywords and we never hit this branch.
|
||||
if (
|
||||
status_code == 400
|
||||
and (
|
||||
"error parsing grammar" in error_msg
|
||||
or "json-schema-to-grammar" in error_msg
|
||||
or (
|
||||
"unable to generate parser" in error_msg
|
||||
and "template" in error_msg
|
||||
)
|
||||
)
|
||||
):
|
||||
return _result(
|
||||
FailoverReason.llama_cpp_grammar_pattern,
|
||||
retryable=True,
|
||||
should_compress=False,
|
||||
)
|
||||
|
||||
# ── 2. HTTP status code classification ──────────────────────────
|
||||
|
||||
if status_code is not None:
|
||||
@@ -520,7 +546,12 @@ def classify_api_error(
|
||||
|
||||
is_disconnect = any(p in error_msg for p in _SERVER_DISCONNECT_PATTERNS)
|
||||
if is_disconnect and not status_code:
|
||||
is_large = approx_tokens > context_length * 0.6 or approx_tokens > 120000 or num_messages > 200
|
||||
# Absolute token/message-count thresholds are only a proxy for smaller
|
||||
# context windows. Large-context sessions can have hundreds of
|
||||
# messages while still being far below their actual token budget.
|
||||
is_large = approx_tokens > context_length * 0.6 or (
|
||||
context_length <= 256000 and (approx_tokens > 120000 or num_messages > 200)
|
||||
)
|
||||
if is_large:
|
||||
return _result(
|
||||
FailoverReason.context_overflow,
|
||||
@@ -766,7 +797,12 @@ def _classify_400(
|
||||
if not err_body_msg:
|
||||
err_body_msg = str(body.get("message") or "").strip().lower()
|
||||
is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "")
|
||||
is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80
|
||||
# Absolute token/message-count thresholds are only a proxy for smaller
|
||||
# context windows. Large-context sessions can have many messages while
|
||||
# still being far below their actual token budget.
|
||||
is_large = approx_tokens > context_length * 0.4 or (
|
||||
context_length <= 256000 and (approx_tokens > 80000 or num_messages > 80)
|
||||
)
|
||||
|
||||
if is_generic and is_large:
|
||||
return result_fn(
|
||||
|
||||
@@ -679,7 +679,21 @@ def translate_stream_event(event: Dict[str, Any], model: str, tool_call_indices:
|
||||
finish_reason_raw = str(cand.get("finishReason") or "")
|
||||
if finish_reason_raw:
|
||||
mapped = "tool_calls" if tool_call_indices else _map_gemini_finish_reason(finish_reason_raw)
|
||||
chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
|
||||
finish_chunk = _make_stream_chunk(model=model, finish_reason=mapped)
|
||||
# Attach usage from this event's usageMetadata so the streaming
|
||||
# loop in run_agent.py can record token counts (mirrors the
|
||||
# non-streaming path in translate_gemini_response).
|
||||
usage_meta = event.get("usageMetadata") or {}
|
||||
if usage_meta:
|
||||
finish_chunk.usage = SimpleNamespace(
|
||||
prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
|
||||
completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
|
||||
total_tokens=int(usage_meta.get("totalTokenCount") or 0),
|
||||
prompt_tokens_details=SimpleNamespace(
|
||||
cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
|
||||
),
|
||||
)
|
||||
chunks.append(finish_chunk)
|
||||
return chunks
|
||||
|
||||
|
||||
|
||||
+15
-2
@@ -489,16 +489,29 @@ def save_credentials(creds: GoogleCredentials) -> Path:
|
||||
"""Atomically write creds to disk with 0o600 permissions."""
|
||||
path = _credentials_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# Tighten parent dir to 0o700 so siblings can't traverse to the creds file.
|
||||
# On Windows this is a no-op (POSIX mode bits aren't enforced); ignore failures.
|
||||
try:
|
||||
os.chmod(path.parent, 0o700)
|
||||
except OSError:
|
||||
pass
|
||||
payload = json.dumps(creds.to_dict(), indent=2, sort_keys=True) + "\n"
|
||||
|
||||
with _credentials_lock():
|
||||
tmp_path = path.with_suffix(f".tmp.{os.getpid()}.{secrets.token_hex(4)}")
|
||||
try:
|
||||
with open(tmp_path, "w", encoding="utf-8") as fh:
|
||||
# Create with 0o600 atomically to close the TOCTOU window where the
|
||||
# default umask (often 0o644) would briefly expose tokens to other
|
||||
# local users between open() and chmod().
|
||||
fd = os.open(
|
||||
str(tmp_path),
|
||||
os.O_WRONLY | os.O_CREAT | os.O_EXCL,
|
||||
stat.S_IRUSR | stat.S_IWUSR,
|
||||
)
|
||||
with os.fdopen(fd, "w", encoding="utf-8") as fh:
|
||||
fh.write(payload)
|
||||
fh.flush()
|
||||
os.fsync(fh.fileno())
|
||||
os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
|
||||
atomic_replace(tmp_path, path)
|
||||
finally:
|
||||
try:
|
||||
|
||||
+233
@@ -0,0 +1,233 @@
|
||||
"""Lightweight internationalization (i18n) for Hermes static user-facing messages.
|
||||
|
||||
Scope (thin slice, by design): only the highest-impact static strings shown
|
||||
to the user by Hermes itself -- approval prompts, a handful of gateway slash
|
||||
command replies, restart-drain notices. Agent-generated output, log lines,
|
||||
error tracebacks, tool outputs, and slash-command descriptions all stay in
|
||||
English.
|
||||
|
||||
Catalog files live under ``locales/<lang>.yaml`` at the repo root. Each
|
||||
catalog is a flat dict keyed by dotted paths (e.g. ``approval.choose`` or
|
||||
``gateway.approval_expired``). Missing keys fall back to English; if English
|
||||
is missing too, the key path itself is returned so a broken catalog never
|
||||
crashes the agent.
|
||||
|
||||
Usage::
|
||||
|
||||
from agent.i18n import t
|
||||
print(t("approval.choose_long")) # current lang
|
||||
print(t("gateway.draining", count=3)) # {count} formatted
|
||||
print(t("approval.choose_long", lang="zh")) # explicit override
|
||||
|
||||
Language resolution order:
|
||||
1. Explicit ``lang=`` argument passed to :func:`t`
|
||||
2. ``HERMES_LANGUAGE`` environment variable (for tests / quick override)
|
||||
3. ``display.language`` from config.yaml
|
||||
4. ``"en"`` (baseline)
|
||||
|
||||
Supported languages: en, zh, ja, de, es, fr, tr, uk. Unknown values fall back to en.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SUPPORTED_LANGUAGES: tuple[str, ...] = ("en", "zh", "ja", "de", "es", "fr", "tr", "uk")
|
||||
DEFAULT_LANGUAGE = "en"
|
||||
|
||||
# Accept a few natural aliases so users who type "chinese" / "zh-CN" / "jp"
|
||||
# get the right catalog instead of silently falling back to English.
|
||||
_LANGUAGE_ALIASES: dict[str, str] = {
|
||||
"english": "en", "en-us": "en", "en-gb": "en",
|
||||
"chinese": "zh", "mandarin": "zh", "zh-cn": "zh", "zh-tw": "zh", "zh-hans": "zh", "zh-hant": "zh",
|
||||
"japanese": "ja", "jp": "ja", "ja-jp": "ja",
|
||||
"german": "de", "deutsch": "de", "de-de": "de",
|
||||
"spanish": "es", "español": "es", "espanol": "es", "es-es": "es", "es-mx": "es",
|
||||
"french": "fr", "français": "fr", "france": "fr", "fr-fr": "fr", "fr-be": "fr", "fr-ca": "fr", "fr-ch": "fr",
|
||||
"ukrainian": "uk", "ukrainisch": "uk", "українська": "uk", "uk-ua": "uk", "ua": "uk",
|
||||
"turkish": "tr", "türkçe": "tr", "tr-tr": "tr",
|
||||
}
|
||||
|
||||
_catalog_cache: dict[str, dict[str, str]] = {}
|
||||
_catalog_lock = threading.Lock()
|
||||
|
||||
|
||||
def _locales_dir() -> Path:
|
||||
"""Return the directory containing locale YAML files.
|
||||
|
||||
Lives next to the repo root so both the bundled install and editable
|
||||
checkouts find it without PYTHONPATH gymnastics.
|
||||
"""
|
||||
# agent/i18n.py -> agent/ -> repo root
|
||||
return Path(__file__).resolve().parent.parent / "locales"
|
||||
|
||||
|
||||
def _normalize_lang(value: Any) -> str:
|
||||
"""Normalize a user-supplied language value to a supported code.
|
||||
|
||||
Accepts supported codes directly, common aliases (``chinese`` -> ``zh``),
|
||||
and case-insensitive regional tags (``zh-CN`` -> ``zh``). Returns the
|
||||
default language for unknown values.
|
||||
"""
|
||||
if not isinstance(value, str):
|
||||
return DEFAULT_LANGUAGE
|
||||
key = value.strip().lower()
|
||||
if not key:
|
||||
return DEFAULT_LANGUAGE
|
||||
if key in SUPPORTED_LANGUAGES:
|
||||
return key
|
||||
if key in _LANGUAGE_ALIASES:
|
||||
return _LANGUAGE_ALIASES[key]
|
||||
# Try stripping a region suffix (e.g. "pt-br" -> "pt" won't be supported,
|
||||
# but "zh-CN" -> "zh" will).
|
||||
base = key.split("-", 1)[0]
|
||||
if base in SUPPORTED_LANGUAGES:
|
||||
return base
|
||||
return DEFAULT_LANGUAGE
|
||||
|
||||
|
||||
def _load_catalog(lang: str) -> dict[str, str]:
|
||||
"""Load and flatten one locale YAML file into a dotted-key dict.
|
||||
|
||||
YAML files can be nested for human readability; this produces the flat
|
||||
key space :func:`t` expects. Cached per-language for the process.
|
||||
"""
|
||||
with _catalog_lock:
|
||||
cached = _catalog_cache.get(lang)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
path = _locales_dir() / f"{lang}.yaml"
|
||||
if not path.is_file():
|
||||
logger.debug("i18n catalog missing for %s at %s", lang, path)
|
||||
with _catalog_lock:
|
||||
_catalog_cache[lang] = {}
|
||||
return {}
|
||||
|
||||
try:
|
||||
import yaml # PyYAML is already a hermes dependency
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
raw = yaml.safe_load(f) or {}
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to load i18n catalog %s: %s", path, exc)
|
||||
with _catalog_lock:
|
||||
_catalog_cache[lang] = {}
|
||||
return {}
|
||||
|
||||
flat: dict[str, str] = {}
|
||||
_flatten_into(raw, "", flat)
|
||||
with _catalog_lock:
|
||||
_catalog_cache[lang] = flat
|
||||
return flat
|
||||
|
||||
|
||||
def _flatten_into(node: Any, prefix: str, out: dict[str, str]) -> None:
|
||||
if isinstance(node, dict):
|
||||
for key, value in node.items():
|
||||
child_key = f"{prefix}.{key}" if prefix else str(key)
|
||||
_flatten_into(value, child_key, out)
|
||||
elif isinstance(node, str):
|
||||
out[prefix] = node
|
||||
# Non-string, non-dict leaves are ignored -- catalogs are text-only.
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _config_language_cached() -> str | None:
|
||||
"""Read ``display.language`` from config.yaml once per process.
|
||||
|
||||
Cached because ``t()`` is called in hot paths (every approval prompt,
|
||||
every gateway reply) and re-reading YAML each call would be wasteful.
|
||||
``reset_language_cache()`` clears this when config changes at runtime
|
||||
(e.g. after the setup wizard).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
lang = (cfg.get("display") or {}).get("language")
|
||||
if lang:
|
||||
return _normalize_lang(lang)
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read display.language from config: %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
def reset_language_cache() -> None:
|
||||
"""Invalidate cached language resolution and catalogs.
|
||||
|
||||
Call after :func:`hermes_cli.config.save_config` if a running process
|
||||
needs to pick up a changed ``display.language`` without restart.
|
||||
"""
|
||||
_config_language_cached.cache_clear()
|
||||
with _catalog_lock:
|
||||
_catalog_cache.clear()
|
||||
|
||||
|
||||
def get_language() -> str:
|
||||
"""Resolve the active language using env > config > default order."""
|
||||
env_lang = os.environ.get("HERMES_LANGUAGE")
|
||||
if env_lang:
|
||||
return _normalize_lang(env_lang)
|
||||
cfg_lang = _config_language_cached()
|
||||
if cfg_lang:
|
||||
return cfg_lang
|
||||
return DEFAULT_LANGUAGE
|
||||
|
||||
|
||||
def t(key: str, lang: str | None = None, **format_kwargs: Any) -> str:
|
||||
"""Translate a dotted key to the active language.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key
|
||||
Dotted path into the catalog, e.g. ``"approval.choose_long"``.
|
||||
lang
|
||||
Explicit language override. Takes precedence over env + config.
|
||||
**format_kwargs
|
||||
``str.format`` substitution arguments (``t("gateway.drain", count=3)``
|
||||
expects a catalog entry with a ``{count}`` placeholder).
|
||||
|
||||
Returns
|
||||
-------
|
||||
The translated string, or the English fallback if the key is missing in
|
||||
the target language, or the bare key if English is also missing.
|
||||
"""
|
||||
target = _normalize_lang(lang) if lang else get_language()
|
||||
catalog = _load_catalog(target)
|
||||
value = catalog.get(key)
|
||||
|
||||
if value is None and target != DEFAULT_LANGUAGE:
|
||||
# Fall through to English rather than showing a key path to the user.
|
||||
value = _load_catalog(DEFAULT_LANGUAGE).get(key)
|
||||
|
||||
if value is None:
|
||||
# Last-ditch: return the key itself. A broken catalog should not
|
||||
# crash anything; it just looks ugly until someone fixes it.
|
||||
logger.debug("i18n miss: key=%r lang=%r", key, target)
|
||||
value = key
|
||||
|
||||
if format_kwargs:
|
||||
try:
|
||||
return value.format(**format_kwargs)
|
||||
except (KeyError, IndexError, ValueError) as exc:
|
||||
logger.warning(
|
||||
"i18n format failed for key=%r lang=%r kwargs=%r: %s",
|
||||
key, target, format_kwargs, exc,
|
||||
)
|
||||
return value
|
||||
return value
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SUPPORTED_LANGUAGES",
|
||||
"DEFAULT_LANGUAGE",
|
||||
"t",
|
||||
"get_language",
|
||||
"reset_language_cache",
|
||||
]
|
||||
@@ -20,25 +20,25 @@ def summarize_manual_compression(
|
||||
headline = f"No changes from compression: {before_count} messages"
|
||||
if after_tokens == before_tokens:
|
||||
token_line = (
|
||||
f"Rough transcript estimate: ~{before_tokens:,} tokens (unchanged)"
|
||||
f"Approx request size: ~{before_tokens:,} tokens (unchanged)"
|
||||
)
|
||||
else:
|
||||
token_line = (
|
||||
f"Rough transcript estimate: ~{before_tokens:,} → "
|
||||
f"Approx request size: ~{before_tokens:,} → "
|
||||
f"~{after_tokens:,} tokens"
|
||||
)
|
||||
else:
|
||||
headline = f"Compressed: {before_count} → {after_count} messages"
|
||||
token_line = (
|
||||
f"Rough transcript estimate: ~{before_tokens:,} → "
|
||||
f"Approx request size: ~{before_tokens:,} → "
|
||||
f"~{after_tokens:,} tokens"
|
||||
)
|
||||
|
||||
note = None
|
||||
if not noop and after_count < before_count and after_tokens > before_tokens:
|
||||
note = (
|
||||
"Note: fewer messages can still raise this rough transcript estimate "
|
||||
"when compression rewrites the transcript into denser summaries."
|
||||
"Note: fewer messages can still raise this estimate when "
|
||||
"compression rewrites the transcript into denser summaries."
|
||||
)
|
||||
|
||||
return {
|
||||
|
||||
@@ -1,17 +1,14 @@
|
||||
"""MemoryManager — orchestrates the built-in memory provider plus at most
|
||||
ONE external plugin memory provider.
|
||||
"""MemoryManager — orchestrates memory providers for the agent.
|
||||
|
||||
Single integration point in run_agent.py. Replaces scattered per-backend
|
||||
code with one manager that delegates to registered providers.
|
||||
|
||||
The BuiltinMemoryProvider is always registered first and cannot be removed.
|
||||
Only ONE external (non-builtin) provider is allowed at a time — attempting
|
||||
to register a second external provider is rejected with a warning. This
|
||||
Only ONE external plugin provider is allowed at a time — attempting to
|
||||
register a second external provider is rejected with a warning. This
|
||||
prevents tool schema bloat and conflicting memory backends.
|
||||
|
||||
Usage in run_agent.py:
|
||||
self._memory_manager = MemoryManager()
|
||||
self._memory_manager.add_provider(BuiltinMemoryProvider(...))
|
||||
# Only ONE of these:
|
||||
self._memory_manager.add_provider(plugin_provider)
|
||||
|
||||
@@ -49,7 +46,7 @@ _INTERNAL_CONTEXT_RE = re.compile(
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_INTERNAL_NOTE_RE = re.compile(
|
||||
r'\[System note:\s*The following is recalled memory context,\s*NOT new user input\.\s*Treat as informational background data\.\]\s*',
|
||||
r'\[System note:\s*The following is recalled memory context,\s*NOT new user input\.\s*Treat as (?:informational background data|authoritative reference data[^\]]*)\.\]\s*',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
@@ -183,7 +180,8 @@ def build_memory_context_block(raw_context: str) -> str:
|
||||
return (
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, "
|
||||
"NOT new user input. Treat as informational background data.]\n\n"
|
||||
"NOT new user input. Treat as authoritative reference data — "
|
||||
"this is the agent's persistent memory and should inform all responses.]\n\n"
|
||||
f"{clean}\n"
|
||||
"</memory-context>"
|
||||
)
|
||||
|
||||
@@ -1,17 +1,16 @@
|
||||
"""Abstract base class for pluggable memory providers.
|
||||
|
||||
Memory providers give the agent persistent recall across sessions. One
|
||||
external provider is active at a time alongside the always-on built-in
|
||||
memory (MEMORY.md / USER.md). The MemoryManager enforces this limit.
|
||||
Memory providers give the agent persistent recall across sessions.
|
||||
The MemoryManager enforces a one-external-provider limit to prevent
|
||||
tool schema bloat and conflicting memory backends.
|
||||
|
||||
Built-in memory is always active as the first provider and cannot be removed.
|
||||
External providers (Honcho, Hindsight, Mem0, etc.) are additive — they never
|
||||
disable the built-in store. Only one external provider runs at a time to
|
||||
prevent tool schema bloat and conflicting memory backends.
|
||||
External providers (Honcho, Hindsight, Mem0, etc.) are registered
|
||||
and managed via MemoryManager. Only one external provider runs at a
|
||||
time.
|
||||
|
||||
Registration:
|
||||
1. Built-in: BuiltinMemoryProvider — always present, not removable.
|
||||
2. Plugins: Ship in plugins/memory/<name>/, activated by memory.provider config.
|
||||
Plugins ship in plugins/memory/<name>/ and are activated via
|
||||
the memory.provider config key.
|
||||
|
||||
Lifecycle (called by MemoryManager, wired in run_agent.py):
|
||||
initialize() — connect, create resources, warm up
|
||||
|
||||
@@ -318,6 +318,17 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||
"ollama.com": "ollama-cloud",
|
||||
}
|
||||
|
||||
# Auto-extend with hostnames derived from provider profiles.
|
||||
# Any provider with a base_url not already in the map gets added automatically.
|
||||
try:
|
||||
from providers import list_providers as _list_providers
|
||||
for _pp in _list_providers():
|
||||
_host = _pp.get_hostname()
|
||||
if _host and _host not in _URL_TO_PROVIDER:
|
||||
_URL_TO_PROVIDER[_host] = _pp.name
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _infer_provider_from_url(base_url: str) -> Optional[str]:
|
||||
"""Infer the models.dev provider name from a base URL.
|
||||
|
||||
@@ -81,15 +81,56 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
|
||||
return repaired
|
||||
|
||||
# Rule 2: when anyOf is present, type belongs only on the children.
|
||||
# Additionally, Moonshot rejects null-type branches inside anyOf
|
||||
# (enum value (<nil>) does not match any type in [string]).
|
||||
# Collapse the anyOf to the first non-null branch and infer its type.
|
||||
if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
|
||||
repaired.pop("type", None)
|
||||
return repaired
|
||||
non_null = [b for b in repaired["anyOf"]
|
||||
if isinstance(b, dict) and b.get("type") != "null"]
|
||||
if non_null and len(non_null) < len(repaired["anyOf"]):
|
||||
# Drop the anyOf wrapper — keep only the non-null branch.
|
||||
# If there's a single non-null branch, promote it and fall
|
||||
# through to Rules 1/3 so nullable/enum cleanup still applies
|
||||
# to the merged node.
|
||||
if len(non_null) == 1:
|
||||
merge = {k: v for k, v in repaired.items() if k != "anyOf"}
|
||||
merge.update(non_null[0])
|
||||
repaired = merge
|
||||
else:
|
||||
repaired["anyOf"] = non_null
|
||||
return repaired
|
||||
else:
|
||||
# Nothing to collapse — parent type stripped, children already
|
||||
# repaired by the recursive walk above.
|
||||
return repaired
|
||||
|
||||
# Moonshot also rejects non-standard keywords like ``nullable`` on
|
||||
# parameter schemas — strip it.
|
||||
repaired.pop("nullable", None)
|
||||
|
||||
# Rule 1: property schemas without type need one. $ref nodes are exempt
|
||||
# — their type comes from the referenced definition.
|
||||
if "$ref" in repaired:
|
||||
return repaired
|
||||
return _fill_missing_type(repaired)
|
||||
# Fill missing type BEFORE Rule 3 so enum cleanup can check the type.
|
||||
if "$ref" not in repaired:
|
||||
repaired = _fill_missing_type(repaired)
|
||||
|
||||
# Rule 3: Moonshot rejects null/empty-string values inside enum arrays
|
||||
# when the parent type is a scalar (string, integer, etc.). The error:
|
||||
# "enum value (<nil>) does not match any type in [string]"
|
||||
# Strip null and empty-string from enum values, and if the enum becomes
|
||||
# empty, drop it entirely.
|
||||
if "enum" in repaired and isinstance(repaired["enum"], list):
|
||||
node_type = repaired.get("type")
|
||||
if node_type in ("string", "integer", "number", "boolean"):
|
||||
cleaned = [v for v in repaired["enum"]
|
||||
if v is not None and v != ""]
|
||||
if cleaned:
|
||||
repaired["enum"] = cleaned
|
||||
else:
|
||||
repaired.pop("enum")
|
||||
|
||||
return repaired
|
||||
|
||||
|
||||
def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
|
||||
|
||||
@@ -182,6 +182,64 @@ SKILLS_GUIDANCE = (
|
||||
"Skills that aren't maintained become liabilities."
|
||||
)
|
||||
|
||||
KANBAN_GUIDANCE = (
|
||||
"# Kanban task execution protocol\n"
|
||||
"You have been assigned ONE task from "
|
||||
"the shared board at `~/.hermes/kanban.db`. Your task id is in "
|
||||
"`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. "
|
||||
"The `kanban_*` tools in your schema are your primary coordination surface — "
|
||||
"they write directly to the shared SQLite DB and work regardless of terminal "
|
||||
"backend (local/docker/modal/ssh).\n"
|
||||
"\n"
|
||||
"## Lifecycle\n"
|
||||
"\n"
|
||||
"1. **Orient.** Call `kanban_show()` first (no args — it defaults to your "
|
||||
"task). The response includes title, body, parent-task handoffs (summary + "
|
||||
"metadata), any prior attempts on this task if you're a retry, the full "
|
||||
"comment thread, and a pre-formatted `worker_context` you can treat as "
|
||||
"ground truth.\n"
|
||||
"2. **Work inside the workspace.** `cd $HERMES_KANBAN_WORKSPACE` before "
|
||||
"any file operations. The workspace is yours for this run. Don't modify "
|
||||
"files outside it unless the task explicitly asks.\n"
|
||||
"3. **Heartbeat on long operations.** Call `kanban_heartbeat(note=...)` "
|
||||
"every few minutes during long subprocesses (training, encoding, crawling). "
|
||||
"Skip heartbeats for short tasks.\n"
|
||||
"4. **Block on genuine ambiguity.** If you need a human decision you cannot "
|
||||
"infer (missing credentials, UX choice, paywalled source, peer output you "
|
||||
"need first), call `kanban_block(reason=\"...\")` and stop. Don't guess. "
|
||||
"The user will unblock with context and the dispatcher will respawn you.\n"
|
||||
"5. **Complete with structured handoff.** Call `kanban_complete(summary=..., "
|
||||
"metadata=...)`. `summary` is 1–3 human-readable sentences naming concrete "
|
||||
"artifacts. `metadata` is machine-readable facts "
|
||||
"(`{changed_files: [...], tests_run: N, decisions: [...]}`). Downstream "
|
||||
"workers read both via their own `kanban_show`. Never put secrets / "
|
||||
"tokens / raw PII in either field — run rows are durable forever.\n"
|
||||
"6. **If follow-up work appears, create it; don't do it.** Use "
|
||||
"`kanban_create(title=..., assignee=<right-profile>, parents=[your-task-id])` "
|
||||
"to spawn a child task for the appropriate specialist profile instead of "
|
||||
"scope-creeping into the next thing.\n"
|
||||
"\n"
|
||||
"## Orchestrator mode\n"
|
||||
"\n"
|
||||
"If your task is itself a decomposition task (e.g. a planner profile given "
|
||||
"a high-level goal), use `kanban_create` to fan out into child tasks — one "
|
||||
"per specialist, each with an explicit `assignee` and `parents=[...]` to "
|
||||
"express dependencies. Then `kanban_complete` your own task with a summary "
|
||||
"of the decomposition. Do NOT execute the work yourself; your job is "
|
||||
"routing, not implementation.\n"
|
||||
"\n"
|
||||
"## Do NOT\n"
|
||||
"\n"
|
||||
"- Do not shell out to `hermes kanban <verb>` for board operations. Use "
|
||||
"the `kanban_*` tools — they work across all terminal backends.\n"
|
||||
"- Do not complete a task you didn't actually finish. Block it.\n"
|
||||
"- Do not assign follow-up work to yourself. Assign it to the right "
|
||||
"specialist profile.\n"
|
||||
"- Do not call `delegate_task` as a board substitute. `delegate_task` is "
|
||||
"for short reasoning subtasks inside your own run; board tasks are for "
|
||||
"cross-agent handoffs that outlive one API loop."
|
||||
)
|
||||
|
||||
TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
||||
"# Tool-use enforcement\n"
|
||||
"You MUST use your tools to take action — do not describe what you would do "
|
||||
@@ -455,6 +513,12 @@ PLATFORM_HINTS = {
|
||||
"image and is the WRONG path. Bare Unicode emoji in text is also not a substitute "
|
||||
"— when a sticker is the right response, use yb_send_sticker."
|
||||
),
|
||||
"api_server": (
|
||||
"You're responding through an API server. The rendering layer is unknown — "
|
||||
"assume plain text. No markdown formatting (no asterisks, bullets, headers, "
|
||||
"code fences). Treat this like a conversation, not a document. Keep responses "
|
||||
"brief and natural."
|
||||
),
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
+17
-11
@@ -305,13 +305,18 @@ def _redact_form_body(text: str) -> str:
|
||||
return _redact_query_string(text.strip())
|
||||
|
||||
|
||||
def redact_sensitive_text(text: str, *, force: bool = False) -> str:
|
||||
def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = False) -> str:
|
||||
"""Apply all redaction patterns to a block of text.
|
||||
|
||||
Safe to call on any string -- non-matching text passes through unchanged.
|
||||
Disabled by default — enable via security.redact_secrets: true in config.yaml.
|
||||
Set force=True for safety boundaries that must never return raw secrets
|
||||
regardless of the user's global logging redaction preference.
|
||||
|
||||
Set code_file=True to skip the ENV-assignment and JSON-field regex
|
||||
patterns when the text is known to be source code (e.g. MAX_TOKENS=***
|
||||
constants, "apiKey": "test" fixtures). Prefix patterns, auth headers,
|
||||
private keys, DB connstrings, JWTs, and URL secrets are still redacted.
|
||||
"""
|
||||
if text is None:
|
||||
return None
|
||||
@@ -325,17 +330,18 @@ def redact_sensitive_text(text: str, *, force: bool = False) -> str:
|
||||
# Known prefixes (sk-, ghp_, etc.)
|
||||
text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
|
||||
|
||||
# ENV assignments: OPENAI_API_KEY=sk-abc...
|
||||
def _redact_env(m):
|
||||
name, quote, value = m.group(1), m.group(2), m.group(3)
|
||||
return f"{name}={quote}{_mask_token(value)}{quote}"
|
||||
text = _ENV_ASSIGN_RE.sub(_redact_env, text)
|
||||
# ENV assignments: OPENAI_API_KEY=*** (skip for code files — false positives)
|
||||
if not code_file:
|
||||
def _redact_env(m):
|
||||
name, quote, value = m.group(1), m.group(2), m.group(3)
|
||||
return f"{name}={quote}{_mask_token(value)}{quote}"
|
||||
text = _ENV_ASSIGN_RE.sub(_redact_env, text)
|
||||
|
||||
# JSON fields: "apiKey": "value"
|
||||
def _redact_json(m):
|
||||
key, value = m.group(1), m.group(2)
|
||||
return f'{key}: "{_mask_token(value)}"'
|
||||
text = _JSON_FIELD_RE.sub(_redact_json, text)
|
||||
# JSON fields: "apiKey": "***" (skip for code files — false positives)
|
||||
def _redact_json(m):
|
||||
key, value = m.group(1), m.group(2)
|
||||
return f'{key}: "{_mask_token(value)}"'
|
||||
text = _JSON_FIELD_RE.sub(_redact_json, text)
|
||||
|
||||
# Authorization headers
|
||||
text = _AUTH_HEADER_RE.sub(
|
||||
|
||||
+38
-3
@@ -6,6 +6,7 @@ can invoke skills via /skill-name commands.
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
@@ -20,10 +21,35 @@ from agent.skill_preprocessing import (
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_skill_commands: Dict[str, Dict[str, Any]] = {}
|
||||
_skill_commands_platform: Optional[str] = None
|
||||
# Patterns for sanitizing skill names into clean hyphen-separated slugs.
|
||||
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
|
||||
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
|
||||
|
||||
|
||||
def _resolve_skill_commands_platform() -> Optional[str]:
|
||||
"""Return the current platform scope used for disabled-skill filtering.
|
||||
|
||||
Used to detect when the active platform has shifted so
|
||||
:func:`get_skill_commands` can drop a stale cache that was populated
|
||||
for a different platform's ``skills.platform_disabled`` view (#14536).
|
||||
|
||||
Resolves from (in order) ``HERMES_PLATFORM`` env var and
|
||||
``HERMES_SESSION_PLATFORM`` from the gateway session context. Returns
|
||||
``None`` when no platform scope is active (e.g. classic CLI, RL
|
||||
rollouts, standalone scripts).
|
||||
"""
|
||||
try:
|
||||
from gateway.session_context import get_session_env
|
||||
|
||||
resolved_platform = (
|
||||
os.getenv("HERMES_PLATFORM")
|
||||
or get_session_env("HERMES_SESSION_PLATFORM")
|
||||
)
|
||||
except Exception:
|
||||
resolved_platform = os.getenv("HERMES_PLATFORM")
|
||||
return resolved_platform or None
|
||||
|
||||
def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
|
||||
"""Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
|
||||
raw_identifier = (skill_identifier or "").strip()
|
||||
@@ -218,7 +244,8 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
Returns:
|
||||
Dict mapping "/skill-name" to {name, description, skill_md_path, skill_dir}.
|
||||
"""
|
||||
global _skill_commands
|
||||
global _skill_commands, _skill_commands_platform
|
||||
_skill_commands_platform = _resolve_skill_commands_platform()
|
||||
_skill_commands = {}
|
||||
try:
|
||||
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
|
||||
@@ -278,8 +305,16 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
|
||||
|
||||
def get_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
"""Return the current skill commands mapping (scan first if empty)."""
|
||||
if not _skill_commands:
|
||||
"""Return the current skill commands mapping (scan first if empty).
|
||||
|
||||
Rescans when the active platform scope changes (e.g. a gateway
|
||||
process serving Telegram and Discord concurrently) so each platform
|
||||
sees its own ``skills.platform_disabled`` view (#14536).
|
||||
"""
|
||||
if (
|
||||
not _skill_commands
|
||||
or _skill_commands_platform != _resolve_skill_commands_platform()
|
||||
):
|
||||
scan_skill_commands()
|
||||
return _skill_commands
|
||||
|
||||
|
||||
@@ -0,0 +1,386 @@
|
||||
"""Stateful scrubber for reasoning/thinking blocks in streamed assistant text.
|
||||
|
||||
``run_agent._strip_think_blocks`` is regex-based and correct for a complete
|
||||
string, but when it runs *per-delta* in ``_fire_stream_delta`` it destroys
|
||||
the state that downstream consumers (CLI ``_stream_delta``, gateway
|
||||
``GatewayStreamConsumer._filter_and_accumulate``) rely on.
|
||||
|
||||
Concretely, when MiniMax-M2.7 streams
|
||||
|
||||
delta1 = "<think>"
|
||||
delta2 = "Let me check their config"
|
||||
delta3 = "</think>"
|
||||
|
||||
the per-delta regex erases delta1 entirely (case 2: unterminated-open at
|
||||
boundary matches ``^<think>...``), so the downstream state machine never
|
||||
sees the open tag, treats delta2 as regular content, and leaks reasoning
|
||||
to the user. Consumers that don't run their own state machine (ACP,
|
||||
api_server, TTS) never had any defence at all — they just emitted
|
||||
whatever survived the upstream regex.
|
||||
|
||||
This module centralises the tag-suppression state machine at the
|
||||
upstream layer so every stream_delta_callback sees text that has
|
||||
already had reasoning blocks removed. Partial tags at delta
|
||||
boundaries are held back until the next delta resolves them, and
|
||||
end-of-stream flushing surfaces any held-back prose that turned out
|
||||
not to be a real tag.
|
||||
|
||||
Usage::
|
||||
|
||||
scrubber = StreamingThinkScrubber()
|
||||
for delta in stream:
|
||||
visible = scrubber.feed(delta)
|
||||
if visible:
|
||||
emit(visible)
|
||||
tail = scrubber.flush() # at end of stream
|
||||
if tail:
|
||||
emit(tail)
|
||||
|
||||
The scrubber is re-entrant per agent instance. Call ``reset()`` at
|
||||
the top of each new turn so a hung block from an interrupted prior
|
||||
stream cannot taint the next turn's output.
|
||||
|
||||
Tag variants handled (case-insensitive):
|
||||
``<think>``, ``<thinking>``, ``<reasoning>``, ``<thought>``,
|
||||
``<REASONING_SCRATCHPAD>``.
|
||||
|
||||
Block-boundary rule for opens: an opening tag is only treated as a
|
||||
reasoning-block opener when it appears at the start of the stream,
|
||||
after a newline (optionally followed by whitespace), or when only
|
||||
whitespace has been emitted on the current line. This prevents prose
|
||||
that *mentions* the tag name (e.g. ``"use <think> tags here"``) from
|
||||
being incorrectly suppressed. Closed pairs (``<think>X</think>``) are
|
||||
always suppressed regardless of boundary; a closed pair is an
|
||||
intentional, bounded construct.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
__all__ = ["StreamingThinkScrubber"]
|
||||
|
||||
|
||||
class StreamingThinkScrubber:
|
||||
"""Stateful scrubber for streaming reasoning/thinking blocks.
|
||||
|
||||
State machine:
|
||||
- ``_in_block``: True while inside an opened block, waiting for
|
||||
a close tag. All text inside is discarded.
|
||||
- ``_buf``: held-back partial-tag tail. Emitted / discarded on
|
||||
the next ``feed()`` call or by ``flush()``.
|
||||
- ``_last_emitted_ended_newline``: True iff the most recent
|
||||
emission to the consumer ended with ``\\n``, or nothing has
|
||||
been emitted yet (start-of-stream counts as a boundary). Used
|
||||
to decide whether an open tag at buffer position 0 is at a
|
||||
block boundary.
|
||||
"""
|
||||
|
||||
_OPEN_TAG_NAMES: Tuple[str, ...] = (
|
||||
"think",
|
||||
"thinking",
|
||||
"reasoning",
|
||||
"thought",
|
||||
"REASONING_SCRATCHPAD",
|
||||
)
|
||||
|
||||
# Materialise literal tag strings so the hot path does string
|
||||
# operations, not regex compilation per feed().
|
||||
_OPEN_TAGS: Tuple[str, ...] = tuple(f"<{name}>" for name in _OPEN_TAG_NAMES)
|
||||
_CLOSE_TAGS: Tuple[str, ...] = tuple(f"</{name}>" for name in _OPEN_TAG_NAMES)
|
||||
|
||||
# Pre-compute the longest tag (for partial-tag hold-back bound).
|
||||
_MAX_TAG_LEN: int = max(len(tag) for tag in _OPEN_TAGS + _CLOSE_TAGS)
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._in_block: bool = False
|
||||
self._buf: str = ""
|
||||
self._last_emitted_ended_newline: bool = True
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Reset all state. Call at the top of every new turn."""
|
||||
self._in_block = False
|
||||
self._buf = ""
|
||||
self._last_emitted_ended_newline = True
|
||||
|
||||
def feed(self, text: str) -> str:
|
||||
"""Feed one delta; return the scrubbed visible portion.
|
||||
|
||||
May return an empty string when the entire delta is reasoning
|
||||
content or is being held back pending resolution of a partial
|
||||
tag at the boundary.
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
buf = self._buf + text
|
||||
self._buf = ""
|
||||
out: list[str] = []
|
||||
|
||||
while buf:
|
||||
if self._in_block:
|
||||
# Hunt for the earliest close tag.
|
||||
close_idx, close_len = self._find_first_tag(
|
||||
buf, self._CLOSE_TAGS,
|
||||
)
|
||||
if close_idx == -1:
|
||||
# No close yet — hold back a potential partial
|
||||
# close-tag prefix; discard everything else.
|
||||
held = self._max_partial_suffix(buf, self._CLOSE_TAGS)
|
||||
self._buf = buf[-held:] if held else ""
|
||||
return "".join(out)
|
||||
# Found close: discard block content + tag, continue.
|
||||
buf = buf[close_idx + close_len:]
|
||||
self._in_block = False
|
||||
else:
|
||||
# Priority 1 — closed <tag>X</tag> pair anywhere in
|
||||
# buf. Closed pairs are always an intentional,
|
||||
# bounded construct (even mid-line prose containing
|
||||
# an open/close pair is almost certainly a model
|
||||
# leaking reasoning inline), so no boundary gating.
|
||||
pair = self._find_earliest_closed_pair(buf)
|
||||
# Priority 2 — unterminated open tag at a block
|
||||
# boundary. Boundary-gated so prose that mentions
|
||||
# '<think>' isn't over-stripped.
|
||||
open_idx, open_len = self._find_open_at_boundary(
|
||||
buf, out,
|
||||
)
|
||||
|
||||
# Pick whichever match comes earliest in the buffer.
|
||||
if pair is not None and (
|
||||
open_idx == -1 or pair[0] <= open_idx
|
||||
):
|
||||
start_idx, end_idx = pair
|
||||
preceding = buf[:start_idx]
|
||||
if preceding:
|
||||
preceding = self._strip_orphan_close_tags(preceding)
|
||||
if preceding:
|
||||
out.append(preceding)
|
||||
self._last_emitted_ended_newline = (
|
||||
preceding.endswith("\n")
|
||||
)
|
||||
buf = buf[end_idx:]
|
||||
continue
|
||||
|
||||
if open_idx != -1:
|
||||
# Unterminated open at boundary — emit preceding,
|
||||
# enter block, continue loop with remainder.
|
||||
preceding = buf[:open_idx]
|
||||
if preceding:
|
||||
preceding = self._strip_orphan_close_tags(preceding)
|
||||
if preceding:
|
||||
out.append(preceding)
|
||||
self._last_emitted_ended_newline = (
|
||||
preceding.endswith("\n")
|
||||
)
|
||||
self._in_block = True
|
||||
buf = buf[open_idx + open_len:]
|
||||
continue
|
||||
|
||||
# No resolvable tag structure in buf. Hold back any
|
||||
# partial-tag prefix at the tail so a split tag
|
||||
# across deltas isn't missed, then emit the rest.
|
||||
held = self._max_partial_suffix(buf, self._OPEN_TAGS)
|
||||
held_close = self._max_partial_suffix(
|
||||
buf, self._CLOSE_TAGS,
|
||||
)
|
||||
held = max(held, held_close)
|
||||
if held:
|
||||
emit_text = buf[:-held]
|
||||
self._buf = buf[-held:]
|
||||
else:
|
||||
emit_text = buf
|
||||
self._buf = ""
|
||||
if emit_text:
|
||||
emit_text = self._strip_orphan_close_tags(emit_text)
|
||||
if emit_text:
|
||||
out.append(emit_text)
|
||||
self._last_emitted_ended_newline = (
|
||||
emit_text.endswith("\n")
|
||||
)
|
||||
return "".join(out)
|
||||
|
||||
return "".join(out)
|
||||
|
||||
def flush(self) -> str:
|
||||
"""End-of-stream flush.
|
||||
|
||||
If still inside an unterminated block, held-back content is
|
||||
discarded — leaking partial reasoning is worse than a
|
||||
truncated answer. Otherwise the held-back partial-tag tail is
|
||||
emitted verbatim (it turned out not to be a real tag prefix).
|
||||
"""
|
||||
if self._in_block:
|
||||
self._buf = ""
|
||||
self._in_block = False
|
||||
return ""
|
||||
tail = self._buf
|
||||
self._buf = ""
|
||||
if not tail:
|
||||
return ""
|
||||
tail = self._strip_orphan_close_tags(tail)
|
||||
if tail:
|
||||
self._last_emitted_ended_newline = tail.endswith("\n")
|
||||
return tail
|
||||
|
||||
# ── internal helpers ───────────────────────────────────────────────
|
||||
|
||||
@staticmethod
|
||||
def _find_first_tag(
|
||||
buf: str, tags: Tuple[str, ...],
|
||||
) -> Tuple[int, int]:
|
||||
"""Return (earliest_index, tag_length) over *tags*, or (-1, 0).
|
||||
|
||||
Case-insensitive match.
|
||||
"""
|
||||
buf_lower = buf.lower()
|
||||
best_idx = -1
|
||||
best_len = 0
|
||||
for tag in tags:
|
||||
idx = buf_lower.find(tag.lower())
|
||||
if idx != -1 and (best_idx == -1 or idx < best_idx):
|
||||
best_idx = idx
|
||||
best_len = len(tag)
|
||||
return best_idx, best_len
|
||||
|
||||
def _find_earliest_closed_pair(self, buf: str):
|
||||
"""Return (start_idx, end_idx) of the earliest closed pair, else None.
|
||||
|
||||
A closed pair is ``<tag>...</tag>`` of any variant. Matches are
|
||||
case-insensitive and non-greedy (the closest close tag after
|
||||
an open tag wins), matching the regex ``<tag>.*?</tag>``
|
||||
semantics of ``_strip_think_blocks`` case 1. When two tag
|
||||
variants could both match, the one whose open tag appears
|
||||
earlier wins.
|
||||
"""
|
||||
buf_lower = buf.lower()
|
||||
best: "tuple[int, int] | None" = None
|
||||
for open_tag, close_tag in zip(self._OPEN_TAGS, self._CLOSE_TAGS):
|
||||
open_lower = open_tag.lower()
|
||||
close_lower = close_tag.lower()
|
||||
open_idx = buf_lower.find(open_lower)
|
||||
if open_idx == -1:
|
||||
continue
|
||||
close_idx = buf_lower.find(
|
||||
close_lower, open_idx + len(open_lower),
|
||||
)
|
||||
if close_idx == -1:
|
||||
continue
|
||||
end_idx = close_idx + len(close_lower)
|
||||
if best is None or open_idx < best[0]:
|
||||
best = (open_idx, end_idx)
|
||||
return best
|
||||
|
||||
def _find_open_at_boundary(
|
||||
self, buf: str, already_emitted: list[str],
|
||||
) -> Tuple[int, int]:
|
||||
"""Return the earliest block-boundary open-tag (idx, len).
|
||||
|
||||
Returns (-1, 0) if no boundary-legal opener is present.
|
||||
"""
|
||||
buf_lower = buf.lower()
|
||||
best_idx = -1
|
||||
best_len = 0
|
||||
for tag in self._OPEN_TAGS:
|
||||
tag_lower = tag.lower()
|
||||
search_start = 0
|
||||
while True:
|
||||
idx = buf_lower.find(tag_lower, search_start)
|
||||
if idx == -1:
|
||||
break
|
||||
if self._is_block_boundary(buf, idx, already_emitted):
|
||||
if best_idx == -1 or idx < best_idx:
|
||||
best_idx = idx
|
||||
best_len = len(tag)
|
||||
break # first boundary hit for this tag is enough
|
||||
search_start = idx + 1
|
||||
return best_idx, best_len
|
||||
|
||||
def _is_block_boundary(
|
||||
self, buf: str, idx: int, already_emitted: list[str],
|
||||
) -> bool:
|
||||
"""True iff position *idx* in *buf* is a block boundary.
|
||||
|
||||
A block boundary is:
|
||||
- buf position 0 AND the most recent emission ended with
|
||||
a newline (or nothing has been emitted yet)
|
||||
- any position whose preceding text on the current line
|
||||
(since the last newline in buf) is whitespace-only, AND
|
||||
if there is no newline in the preceding buf portion, the
|
||||
most recent prior emission ended with a newline
|
||||
"""
|
||||
if idx == 0:
|
||||
# Check whether the last already-emitted chunk in THIS
|
||||
# feed() call ended with a newline, otherwise fall back
|
||||
# to the cross-feed flag.
|
||||
if already_emitted:
|
||||
return already_emitted[-1].endswith("\n")
|
||||
return self._last_emitted_ended_newline
|
||||
preceding = buf[:idx]
|
||||
last_nl = preceding.rfind("\n")
|
||||
if last_nl == -1:
|
||||
# No newline in buf before the tag — boundary only if the
|
||||
# prior emission ended with a newline AND everything since
|
||||
# is whitespace.
|
||||
if already_emitted:
|
||||
prior_newline = already_emitted[-1].endswith("\n")
|
||||
else:
|
||||
prior_newline = self._last_emitted_ended_newline
|
||||
return prior_newline and preceding.strip() == ""
|
||||
# Newline present — text between it and the tag must be
|
||||
# whitespace-only.
|
||||
return preceding[last_nl + 1:].strip() == ""
|
||||
|
||||
@classmethod
|
||||
def _max_partial_suffix(
|
||||
cls, buf: str, tags: Tuple[str, ...],
|
||||
) -> int:
|
||||
"""Return the longest buf-suffix that is a prefix of any tag.
|
||||
|
||||
Only prefixes strictly shorter than the tag itself count
|
||||
(full-length suffixes are the tag and are handled as matches,
|
||||
not held-back partials). Case-insensitive.
|
||||
"""
|
||||
if not buf:
|
||||
return 0
|
||||
buf_lower = buf.lower()
|
||||
max_check = min(len(buf_lower), cls._MAX_TAG_LEN - 1)
|
||||
for i in range(max_check, 0, -1):
|
||||
suffix = buf_lower[-i:]
|
||||
for tag in tags:
|
||||
tag_lower = tag.lower()
|
||||
if len(tag_lower) > i and tag_lower.startswith(suffix):
|
||||
return i
|
||||
return 0
|
||||
|
||||
@classmethod
|
||||
def _strip_orphan_close_tags(cls, text: str) -> str:
|
||||
"""Remove any close tags from *text* (orphan-close handling).
|
||||
|
||||
An orphan close tag has no matching open in the current
|
||||
scrubber state; it's always noise, stripped with any trailing
|
||||
whitespace so the surrounding prose flows naturally.
|
||||
"""
|
||||
if "</" not in text:
|
||||
return text
|
||||
text_lower = text.lower()
|
||||
out: list[str] = []
|
||||
i = 0
|
||||
while i < len(text):
|
||||
matched = False
|
||||
if text_lower[i:i + 2] == "</":
|
||||
for tag in cls._CLOSE_TAGS:
|
||||
tag_lower = tag.lower()
|
||||
tag_len = len(tag_lower)
|
||||
if text_lower[i:i + tag_len] == tag_lower:
|
||||
# Skip the tag and any trailing whitespace,
|
||||
# matching _strip_think_blocks case 3.
|
||||
j = i + tag_len
|
||||
while j < len(text) and text[j] in " \t\n\r":
|
||||
j += 1
|
||||
i = j
|
||||
matched = True
|
||||
break
|
||||
if not matched:
|
||||
out.append(text[i])
|
||||
i += 1
|
||||
return "".join(out)
|
||||
@@ -17,6 +17,7 @@ logger = logging.getLogger(__name__)
|
||||
# so silent-drops (e.g. OpenRouter 402 exhausting the fallback chain)
|
||||
# become visible instead of piling up as NULL session titles.
|
||||
FailureCallback = Callable[[str, BaseException], None]
|
||||
TitleCallback = Callable[[str], None]
|
||||
|
||||
_TITLE_PROMPT = (
|
||||
"Generate a short, descriptive title (3-7 words) for a conversation that starts with the "
|
||||
@@ -90,6 +91,7 @@ def auto_title_session(
|
||||
assistant_response: str,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
main_runtime: dict = None,
|
||||
title_callback: Optional[TitleCallback] = None,
|
||||
) -> None:
|
||||
"""Generate and set a session title if one doesn't already exist.
|
||||
|
||||
@@ -119,6 +121,11 @@ def auto_title_session(
|
||||
try:
|
||||
session_db.set_session_title(session_id, title)
|
||||
logger.debug("Auto-generated session title: %s", title)
|
||||
if title_callback is not None:
|
||||
try:
|
||||
title_callback(title)
|
||||
except Exception:
|
||||
logger.debug("Auto-title callback failed", exc_info=True)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to set auto-generated title: %s", e)
|
||||
|
||||
@@ -131,6 +138,7 @@ def maybe_auto_title(
|
||||
conversation_history: list,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
main_runtime: dict = None,
|
||||
title_callback: Optional[TitleCallback] = None,
|
||||
) -> None:
|
||||
"""Fire-and-forget title generation after the first exchange.
|
||||
|
||||
@@ -152,7 +160,11 @@ def maybe_auto_title(
|
||||
thread = threading.Thread(
|
||||
target=auto_title_session,
|
||||
args=(session_db, session_id, user_message, assistant_response),
|
||||
kwargs={"failure_callback": failure_callback, "main_runtime": main_runtime},
|
||||
kwargs={
|
||||
"failure_callback": failure_callback,
|
||||
"main_runtime": main_runtime,
|
||||
"title_callback": title_callback,
|
||||
},
|
||||
daemon=True,
|
||||
name="auto-title",
|
||||
)
|
||||
|
||||
@@ -0,0 +1,455 @@
|
||||
"""Pure tool-call loop guardrail primitives.
|
||||
|
||||
The controller in this module is intentionally side-effect free: it tracks
|
||||
per-turn tool-call observations and returns decisions. Runtime code owns whether
|
||||
those decisions become warning guidance, synthetic tool results, or controlled
|
||||
turn halts.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Mapping
|
||||
|
||||
from utils import safe_json_loads
|
||||
|
||||
|
||||
IDEMPOTENT_TOOL_NAMES = frozenset(
|
||||
{
|
||||
"read_file",
|
||||
"search_files",
|
||||
"web_search",
|
||||
"web_extract",
|
||||
"session_search",
|
||||
"browser_snapshot",
|
||||
"browser_console",
|
||||
"browser_get_images",
|
||||
"mcp_filesystem_read_file",
|
||||
"mcp_filesystem_read_text_file",
|
||||
"mcp_filesystem_read_multiple_files",
|
||||
"mcp_filesystem_list_directory",
|
||||
"mcp_filesystem_list_directory_with_sizes",
|
||||
"mcp_filesystem_directory_tree",
|
||||
"mcp_filesystem_get_file_info",
|
||||
"mcp_filesystem_search_files",
|
||||
}
|
||||
)
|
||||
|
||||
MUTATING_TOOL_NAMES = frozenset(
|
||||
{
|
||||
"terminal",
|
||||
"execute_code",
|
||||
"write_file",
|
||||
"patch",
|
||||
"todo",
|
||||
"memory",
|
||||
"skill_manage",
|
||||
"browser_click",
|
||||
"browser_type",
|
||||
"browser_press",
|
||||
"browser_scroll",
|
||||
"browser_navigate",
|
||||
"send_message",
|
||||
"cronjob",
|
||||
"delegate_task",
|
||||
"process",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ToolCallGuardrailConfig:
|
||||
"""Thresholds for per-turn tool-call loop detection.
|
||||
|
||||
Warnings are enabled by default and never prevent tool execution. Hard stops
|
||||
are explicit opt-in so interactive CLI/TUI sessions get a gentle nudge unless
|
||||
the user enables circuit-breaker behavior in config.yaml.
|
||||
"""
|
||||
|
||||
warnings_enabled: bool = True
|
||||
hard_stop_enabled: bool = False
|
||||
exact_failure_warn_after: int = 2
|
||||
exact_failure_block_after: int = 5
|
||||
same_tool_failure_warn_after: int = 3
|
||||
same_tool_failure_halt_after: int = 8
|
||||
no_progress_warn_after: int = 2
|
||||
no_progress_block_after: int = 5
|
||||
idempotent_tools: frozenset[str] = field(default_factory=lambda: IDEMPOTENT_TOOL_NAMES)
|
||||
mutating_tools: frozenset[str] = field(default_factory=lambda: MUTATING_TOOL_NAMES)
|
||||
|
||||
@classmethod
|
||||
def from_mapping(cls, data: Mapping[str, Any] | None) -> "ToolCallGuardrailConfig":
|
||||
"""Build config from the `tool_loop_guardrails` config.yaml section."""
|
||||
if not isinstance(data, Mapping):
|
||||
return cls()
|
||||
|
||||
warn_after = data.get("warn_after")
|
||||
if not isinstance(warn_after, Mapping):
|
||||
warn_after = {}
|
||||
hard_stop_after = data.get("hard_stop_after")
|
||||
if not isinstance(hard_stop_after, Mapping):
|
||||
hard_stop_after = {}
|
||||
|
||||
defaults = cls()
|
||||
return cls(
|
||||
warnings_enabled=_as_bool(data.get("warnings_enabled"), defaults.warnings_enabled),
|
||||
hard_stop_enabled=_as_bool(data.get("hard_stop_enabled"), defaults.hard_stop_enabled),
|
||||
exact_failure_warn_after=_positive_int(
|
||||
warn_after.get("exact_failure", data.get("exact_failure_warn_after")),
|
||||
defaults.exact_failure_warn_after,
|
||||
),
|
||||
same_tool_failure_warn_after=_positive_int(
|
||||
warn_after.get("same_tool_failure", data.get("same_tool_failure_warn_after")),
|
||||
defaults.same_tool_failure_warn_after,
|
||||
),
|
||||
no_progress_warn_after=_positive_int(
|
||||
warn_after.get("idempotent_no_progress", data.get("no_progress_warn_after")),
|
||||
defaults.no_progress_warn_after,
|
||||
),
|
||||
exact_failure_block_after=_positive_int(
|
||||
hard_stop_after.get("exact_failure", data.get("exact_failure_block_after")),
|
||||
defaults.exact_failure_block_after,
|
||||
),
|
||||
same_tool_failure_halt_after=_positive_int(
|
||||
hard_stop_after.get("same_tool_failure", data.get("same_tool_failure_halt_after")),
|
||||
defaults.same_tool_failure_halt_after,
|
||||
),
|
||||
no_progress_block_after=_positive_int(
|
||||
hard_stop_after.get("idempotent_no_progress", data.get("no_progress_block_after")),
|
||||
defaults.no_progress_block_after,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ToolCallSignature:
|
||||
"""Stable, non-reversible identity for a tool name plus canonical args."""
|
||||
|
||||
tool_name: str
|
||||
args_hash: str
|
||||
|
||||
@classmethod
|
||||
def from_call(cls, tool_name: str, args: Mapping[str, Any] | None) -> "ToolCallSignature":
|
||||
canonical = canonical_tool_args(args or {})
|
||||
return cls(tool_name=tool_name, args_hash=_sha256(canonical))
|
||||
|
||||
def to_metadata(self) -> dict[str, str]:
|
||||
"""Return public metadata without raw argument values."""
|
||||
return {"tool_name": self.tool_name, "args_hash": self.args_hash}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ToolGuardrailDecision:
|
||||
"""Decision returned by the tool-call guardrail controller."""
|
||||
|
||||
action: str = "allow" # allow | warn | block | halt
|
||||
code: str = "allow"
|
||||
message: str = ""
|
||||
tool_name: str = ""
|
||||
count: int = 0
|
||||
signature: ToolCallSignature | None = None
|
||||
|
||||
@property
|
||||
def allows_execution(self) -> bool:
|
||||
return self.action in {"allow", "warn"}
|
||||
|
||||
@property
|
||||
def should_halt(self) -> bool:
|
||||
return self.action in {"block", "halt"}
|
||||
|
||||
def to_metadata(self) -> dict[str, Any]:
|
||||
data: dict[str, Any] = {
|
||||
"action": self.action,
|
||||
"code": self.code,
|
||||
"message": self.message,
|
||||
"tool_name": self.tool_name,
|
||||
"count": self.count,
|
||||
}
|
||||
if self.signature is not None:
|
||||
data["signature"] = self.signature.to_metadata()
|
||||
return data
|
||||
|
||||
|
||||
def canonical_tool_args(args: Mapping[str, Any]) -> str:
|
||||
"""Return sorted compact JSON for parsed tool arguments."""
|
||||
if not isinstance(args, Mapping):
|
||||
raise TypeError(f"tool args must be a mapping, got {type(args).__name__}")
|
||||
return json.dumps(
|
||||
args,
|
||||
ensure_ascii=False,
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
default=str,
|
||||
)
|
||||
|
||||
|
||||
def classify_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
|
||||
"""Safety-fallback classifier used only when callers don't pass ``failed``.
|
||||
|
||||
Mirrors ``agent.display._detect_tool_failure`` exactly so the guardrail
|
||||
never disagrees with the CLI's user-visible ``[error]`` tag. Production
|
||||
callers in ``run_agent.py`` always pass an explicit ``failed=`` derived
|
||||
from ``_detect_tool_failure``; this function exists so standalone callers
|
||||
(tests, tooling) still get consistent behavior.
|
||||
"""
|
||||
if result is None:
|
||||
return False, ""
|
||||
|
||||
if tool_name == "terminal":
|
||||
data = safe_json_loads(result)
|
||||
if isinstance(data, dict):
|
||||
exit_code = data.get("exit_code")
|
||||
if exit_code is not None and exit_code != 0:
|
||||
return True, f" [exit {exit_code}]"
|
||||
return False, ""
|
||||
|
||||
if tool_name == "memory":
|
||||
data = safe_json_loads(result)
|
||||
if isinstance(data, dict):
|
||||
if data.get("success") is False and "exceed the limit" in data.get("error", ""):
|
||||
return True, " [full]"
|
||||
|
||||
lower = result[:500].lower()
|
||||
if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
|
||||
return True, " [error]"
|
||||
|
||||
return False, ""
|
||||
|
||||
|
||||
class ToolCallGuardrailController:
|
||||
"""Per-turn controller for repeated failed/non-progressing tool calls."""
|
||||
|
||||
def __init__(self, config: ToolCallGuardrailConfig | None = None):
|
||||
self.config = config or ToolCallGuardrailConfig()
|
||||
self.reset_for_turn()
|
||||
|
||||
def reset_for_turn(self) -> None:
|
||||
self._exact_failure_counts: dict[ToolCallSignature, int] = {}
|
||||
self._same_tool_failure_counts: dict[str, int] = {}
|
||||
self._no_progress: dict[ToolCallSignature, tuple[str, int]] = {}
|
||||
self._halt_decision: ToolGuardrailDecision | None = None
|
||||
|
||||
@property
|
||||
def halt_decision(self) -> ToolGuardrailDecision | None:
|
||||
return self._halt_decision
|
||||
|
||||
def before_call(self, tool_name: str, args: Mapping[str, Any] | None) -> ToolGuardrailDecision:
|
||||
signature = ToolCallSignature.from_call(tool_name, _coerce_args(args))
|
||||
if not self.config.hard_stop_enabled:
|
||||
return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
|
||||
|
||||
exact_count = self._exact_failure_counts.get(signature, 0)
|
||||
if exact_count >= self.config.exact_failure_block_after:
|
||||
decision = ToolGuardrailDecision(
|
||||
action="block",
|
||||
code="repeated_exact_failure_block",
|
||||
message=(
|
||||
f"Blocked {tool_name}: the same tool call failed {exact_count} "
|
||||
"times with identical arguments. Stop retrying it unchanged; "
|
||||
"change strategy or explain the blocker."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=exact_count,
|
||||
signature=signature,
|
||||
)
|
||||
self._halt_decision = decision
|
||||
return decision
|
||||
|
||||
if self._is_idempotent(tool_name):
|
||||
record = self._no_progress.get(signature)
|
||||
if record is not None:
|
||||
_result_hash, repeat_count = record
|
||||
if repeat_count >= self.config.no_progress_block_after:
|
||||
decision = ToolGuardrailDecision(
|
||||
action="block",
|
||||
code="idempotent_no_progress_block",
|
||||
message=(
|
||||
f"Blocked {tool_name}: this read-only call returned the same "
|
||||
f"result {repeat_count} times. Stop repeating it unchanged; "
|
||||
"use the result already provided or try a different query."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=repeat_count,
|
||||
signature=signature,
|
||||
)
|
||||
self._halt_decision = decision
|
||||
return decision
|
||||
|
||||
return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
|
||||
|
||||
def after_call(
|
||||
self,
|
||||
tool_name: str,
|
||||
args: Mapping[str, Any] | None,
|
||||
result: str | None,
|
||||
*,
|
||||
failed: bool | None = None,
|
||||
) -> ToolGuardrailDecision:
|
||||
args = _coerce_args(args)
|
||||
signature = ToolCallSignature.from_call(tool_name, args)
|
||||
if failed is None:
|
||||
failed, _ = classify_tool_failure(tool_name, result)
|
||||
|
||||
if failed:
|
||||
exact_count = self._exact_failure_counts.get(signature, 0) + 1
|
||||
self._exact_failure_counts[signature] = exact_count
|
||||
self._no_progress.pop(signature, None)
|
||||
|
||||
same_count = self._same_tool_failure_counts.get(tool_name, 0) + 1
|
||||
self._same_tool_failure_counts[tool_name] = same_count
|
||||
|
||||
if self.config.hard_stop_enabled and same_count >= self.config.same_tool_failure_halt_after:
|
||||
decision = ToolGuardrailDecision(
|
||||
action="halt",
|
||||
code="same_tool_failure_halt",
|
||||
message=(
|
||||
f"Stopped {tool_name}: it failed {same_count} times this turn. "
|
||||
"Stop retrying the same failing tool path and choose a different approach."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=same_count,
|
||||
signature=signature,
|
||||
)
|
||||
self._halt_decision = decision
|
||||
return decision
|
||||
|
||||
if self.config.warnings_enabled and exact_count >= self.config.exact_failure_warn_after:
|
||||
return ToolGuardrailDecision(
|
||||
action="warn",
|
||||
code="repeated_exact_failure_warning",
|
||||
message=(
|
||||
f"{tool_name} has failed {exact_count} times with identical arguments. "
|
||||
"This looks like a loop; inspect the error and change strategy "
|
||||
"instead of retrying it unchanged."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=exact_count,
|
||||
signature=signature,
|
||||
)
|
||||
|
||||
if self.config.warnings_enabled and same_count >= self.config.same_tool_failure_warn_after:
|
||||
return ToolGuardrailDecision(
|
||||
action="warn",
|
||||
code="same_tool_failure_warning",
|
||||
message=(
|
||||
f"{tool_name} has failed {same_count} times this turn. "
|
||||
"This looks like a loop; change approach before retrying."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=same_count,
|
||||
signature=signature,
|
||||
)
|
||||
|
||||
return ToolGuardrailDecision(tool_name=tool_name, count=exact_count, signature=signature)
|
||||
|
||||
self._exact_failure_counts.pop(signature, None)
|
||||
self._same_tool_failure_counts.pop(tool_name, None)
|
||||
|
||||
if not self._is_idempotent(tool_name):
|
||||
self._no_progress.pop(signature, None)
|
||||
return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
|
||||
|
||||
result_hash = _result_hash(result)
|
||||
previous = self._no_progress.get(signature)
|
||||
repeat_count = 1
|
||||
if previous is not None and previous[0] == result_hash:
|
||||
repeat_count = previous[1] + 1
|
||||
self._no_progress[signature] = (result_hash, repeat_count)
|
||||
|
||||
if self.config.warnings_enabled and repeat_count >= self.config.no_progress_warn_after:
|
||||
return ToolGuardrailDecision(
|
||||
action="warn",
|
||||
code="idempotent_no_progress_warning",
|
||||
message=(
|
||||
f"{tool_name} returned the same result {repeat_count} times. "
|
||||
"Use the result already provided or change the query instead of "
|
||||
"repeating it unchanged."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=repeat_count,
|
||||
signature=signature,
|
||||
)
|
||||
|
||||
return ToolGuardrailDecision(tool_name=tool_name, count=repeat_count, signature=signature)
|
||||
|
||||
def _is_idempotent(self, tool_name: str) -> bool:
|
||||
if tool_name in self.config.mutating_tools:
|
||||
return False
|
||||
return tool_name in self.config.idempotent_tools
|
||||
|
||||
|
||||
def toolguard_synthetic_result(decision: ToolGuardrailDecision) -> str:
|
||||
"""Build a synthetic role=tool content string for a blocked tool call."""
|
||||
return json.dumps(
|
||||
{
|
||||
"error": decision.message,
|
||||
"guardrail": decision.to_metadata(),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
|
||||
def append_toolguard_guidance(result: str, decision: ToolGuardrailDecision) -> str:
|
||||
"""Append runtime guidance to the current tool result content."""
|
||||
if decision.action not in {"warn", "halt"} or not decision.message:
|
||||
return result
|
||||
label = "Tool loop hard stop" if decision.action == "halt" else "Tool loop warning"
|
||||
suffix = (
|
||||
f"\n\n[{label}: "
|
||||
f"{decision.code}; count={decision.count}; {decision.message}]"
|
||||
)
|
||||
return (result or "") + suffix
|
||||
|
||||
|
||||
def _coerce_args(args: Mapping[str, Any] | None) -> Mapping[str, Any]:
|
||||
return args if isinstance(args, Mapping) else {}
|
||||
|
||||
|
||||
def _result_hash(result: str | None) -> str:
|
||||
parsed = safe_json_loads(result or "")
|
||||
if parsed is not None:
|
||||
try:
|
||||
canonical = json.dumps(
|
||||
parsed,
|
||||
ensure_ascii=False,
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
default=str,
|
||||
)
|
||||
except TypeError:
|
||||
canonical = str(parsed)
|
||||
else:
|
||||
canonical = result or ""
|
||||
return _sha256(canonical)
|
||||
|
||||
|
||||
def _as_bool(value: Any, default: bool) -> bool:
|
||||
if value is None:
|
||||
return default
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
if isinstance(value, (int, float)):
|
||||
return bool(value)
|
||||
if isinstance(value, str):
|
||||
lowered = value.strip().lower()
|
||||
if lowered in {"1", "true", "yes", "on", "enabled"}:
|
||||
return True
|
||||
if lowered in {"0", "false", "no", "off", "disabled"}:
|
||||
return False
|
||||
return default
|
||||
|
||||
|
||||
def _positive_int(value: Any, default: int) -> int:
|
||||
if value is None:
|
||||
return default
|
||||
try:
|
||||
parsed = int(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
return parsed if parsed >= 1 else default
|
||||
|
||||
|
||||
def _sha256(value: str) -> str:
|
||||
return hashlib.sha256(value.encode("utf-8")).hexdigest()
|
||||
@@ -6,9 +6,16 @@ Usage:
|
||||
result = transport.normalize_response(raw_response)
|
||||
"""
|
||||
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason # noqa: F401
|
||||
from agent.transports.types import (
|
||||
NormalizedResponse,
|
||||
ToolCall,
|
||||
Usage,
|
||||
build_tool_call,
|
||||
map_finish_reason,
|
||||
) # noqa: F401
|
||||
|
||||
_REGISTRY: dict = {}
|
||||
_discovered: bool = False
|
||||
|
||||
|
||||
def register_transport(api_mode: str, transport_cls: type) -> None:
|
||||
@@ -23,6 +30,9 @@ def get_transport(api_mode: str):
|
||||
This allows gradual migration — call sites can check for None
|
||||
and fall back to the legacy code path.
|
||||
"""
|
||||
global _discovered
|
||||
if not _discovered:
|
||||
_discover_transports()
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
# The registry can be partially populated when a specific transport
|
||||
@@ -38,6 +48,8 @@ def get_transport(api_mode: str):
|
||||
|
||||
def _discover_transports() -> None:
|
||||
"""Import all transport modules to trigger auto-registration."""
|
||||
global _discovered
|
||||
_discovered = True
|
||||
try:
|
||||
import agent.transports.anthropic # noqa: F401
|
||||
except ImportError:
|
||||
|
||||
@@ -109,7 +109,9 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
def api_mode(self) -> str:
|
||||
return "chat_completions"
|
||||
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
|
||||
def convert_messages(
|
||||
self, messages: list[dict[str, Any]], **kwargs
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Messages are already in OpenAI format — sanitize Codex leaks only.
|
||||
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` /
|
||||
@@ -126,7 +128,9 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
|
||||
if isinstance(tc, dict) and (
|
||||
"call_id" in tc or "response_item_id" in tc
|
||||
):
|
||||
needs_sanitize = True
|
||||
break
|
||||
if needs_sanitize:
|
||||
@@ -149,39 +153,41 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
tc.pop("response_item_id", None)
|
||||
return sanitized
|
||||
|
||||
def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Tools are already in OpenAI format — identity."""
|
||||
return tools
|
||||
|
||||
def build_kwargs(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
**params,
|
||||
) -> Dict[str, Any]:
|
||||
) -> dict[str, Any]:
|
||||
"""Build chat.completions.create() kwargs.
|
||||
|
||||
This is the most complex transport method — it handles ~16 providers
|
||||
via params rather than subclasses.
|
||||
|
||||
params:
|
||||
params (all optional):
|
||||
timeout: float — API call timeout
|
||||
max_tokens: int | None — user-configured max tokens
|
||||
ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
|
||||
ephemeral_max_output_tokens: int | None — one-shot override
|
||||
max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
|
||||
reasoning_config: dict | None
|
||||
request_overrides: dict | None
|
||||
session_id: str | None
|
||||
qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
|
||||
model_lower: str — lowercase model name for pattern matching
|
||||
# Provider detection flags (all optional, default False)
|
||||
# Provider profile path (all per-provider quirks live in providers/)
|
||||
provider_profile: ProviderProfile | None — when present, delegates to
|
||||
_build_kwargs_from_profile(); all flag params below are bypassed.
|
||||
# Legacy-path flags — only used when provider_profile is None
|
||||
# (i.e. custom / unregistered providers). Known providers all go
|
||||
# through provider_profile.
|
||||
is_openrouter: bool
|
||||
is_nous: bool
|
||||
is_qwen_portal: bool
|
||||
is_github_models: bool
|
||||
is_nvidia_nim: bool
|
||||
is_kimi: bool
|
||||
is_tokenhub: bool
|
||||
is_lmstudio: bool
|
||||
is_custom_provider: bool
|
||||
ollama_num_ctx: int | None
|
||||
@@ -190,6 +196,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
# Qwen-specific
|
||||
qwen_prepare_fn: callable | None — runs AFTER codex sanitization
|
||||
qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
|
||||
qwen_session_metadata: dict | None
|
||||
# Temperature
|
||||
fixed_temperature: Any — from _fixed_temperature_for_model()
|
||||
omit_temperature: bool
|
||||
@@ -199,28 +206,21 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
lmstudio_reasoning_options: list[str] | None # raw allowed_options from /api/v1/models
|
||||
# Claude on OpenRouter/Nous max output
|
||||
anthropic_max_output: int | None
|
||||
# Extra
|
||||
extra_body_additions: dict | None — pre-built extra_body entries
|
||||
extra_body_additions: dict | None
|
||||
"""
|
||||
# Codex sanitization: drop reasoning_items / call_id / response_item_id
|
||||
sanitized = self.convert_messages(messages)
|
||||
|
||||
# Qwen portal prep AFTER codex sanitization. If sanitize already
|
||||
# deepcopied, reuse that copy via the in-place variant to avoid a
|
||||
# second deepcopy.
|
||||
is_qwen = params.get("is_qwen_portal", False)
|
||||
if is_qwen:
|
||||
qwen_prep = params.get("qwen_prepare_fn")
|
||||
qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
|
||||
if sanitized is messages:
|
||||
if qwen_prep is not None:
|
||||
sanitized = qwen_prep(sanitized)
|
||||
else:
|
||||
# Already deepcopied — transform in place
|
||||
if qwen_prep_inplace is not None:
|
||||
qwen_prep_inplace(sanitized)
|
||||
elif qwen_prep is not None:
|
||||
sanitized = qwen_prep(sanitized)
|
||||
# ── Provider profile: single-path when present ──────────────────
|
||||
_profile = params.get("provider_profile")
|
||||
if _profile:
|
||||
return self._build_kwargs_from_profile(
|
||||
_profile, model, sanitized, tools, params
|
||||
)
|
||||
|
||||
# ── Legacy fallback (unregistered / unknown provider) ───────────
|
||||
# Reached only when get_provider_profile() returned None.
|
||||
# Known providers always go through the profile path above.
|
||||
|
||||
# Developer role swap for GPT-5/Codex models
|
||||
model_lower = params.get("model_lower", (model or "").lower())
|
||||
@@ -233,7 +233,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
sanitized = list(sanitized)
|
||||
sanitized[0] = {**sanitized[0], "role": "developer"}
|
||||
|
||||
api_kwargs: Dict[str, Any] = {
|
||||
api_kwargs: dict[str, Any] = {
|
||||
"model": model,
|
||||
"messages": sanitized,
|
||||
}
|
||||
@@ -242,19 +242,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
if timeout is not None:
|
||||
api_kwargs["timeout"] = timeout
|
||||
|
||||
# Temperature
|
||||
fixed_temp = params.get("fixed_temperature")
|
||||
omit_temp = params.get("omit_temperature", False)
|
||||
if omit_temp:
|
||||
api_kwargs.pop("temperature", None)
|
||||
elif fixed_temp is not None:
|
||||
api_kwargs["temperature"] = fixed_temp
|
||||
|
||||
# Qwen metadata (caller precomputes {sessionId, promptId})
|
||||
qwen_meta = params.get("qwen_session_metadata")
|
||||
if qwen_meta and is_qwen:
|
||||
api_kwargs["metadata"] = qwen_meta
|
||||
|
||||
# Tools
|
||||
if tools:
|
||||
# Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting
|
||||
@@ -278,13 +265,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
api_kwargs.update(max_tokens_fn(ephemeral))
|
||||
elif max_tokens is not None and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(max_tokens))
|
||||
elif is_nvidia_nim and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(16384))
|
||||
elif is_qwen and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(65536))
|
||||
elif is_kimi and max_tokens_fn:
|
||||
# Kimi/Moonshot: 32000 matches Kimi CLI's default
|
||||
api_kwargs.update(max_tokens_fn(32000))
|
||||
elif anthropic_max_out is not None:
|
||||
api_kwargs["max_tokens"] = anthropic_max_out
|
||||
|
||||
@@ -331,7 +311,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
api_kwargs["reasoning_effort"] = _lm_effort
|
||||
|
||||
# extra_body assembly
|
||||
extra_body: Dict[str, Any] = {}
|
||||
extra_body: dict[str, Any] = {}
|
||||
|
||||
is_openrouter = params.get("is_openrouter", False)
|
||||
is_nous = params.get("is_nous", False)
|
||||
@@ -361,35 +341,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
if gh_reasoning is not None:
|
||||
extra_body["reasoning"] = gh_reasoning
|
||||
else:
|
||||
if reasoning_config is not None:
|
||||
rc = dict(reasoning_config)
|
||||
if is_nous and rc.get("enabled") is False:
|
||||
pass # omit for Nous when disabled
|
||||
else:
|
||||
extra_body["reasoning"] = rc
|
||||
else:
|
||||
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
||||
|
||||
if is_nous:
|
||||
extra_body["tags"] = ["product=hermes-agent"]
|
||||
|
||||
# Ollama num_ctx
|
||||
ollama_ctx = params.get("ollama_num_ctx")
|
||||
if ollama_ctx:
|
||||
options = extra_body.get("options", {})
|
||||
options["num_ctx"] = ollama_ctx
|
||||
extra_body["options"] = options
|
||||
|
||||
# Ollama/custom think=false
|
||||
if params.get("is_custom_provider", False):
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
_effort = (reasoning_config.get("effort") or "").strip().lower()
|
||||
_enabled = reasoning_config.get("enabled", True)
|
||||
if _effort == "none" or _enabled is False:
|
||||
extra_body["think"] = False
|
||||
|
||||
if is_qwen:
|
||||
extra_body["vl_high_resolution_images"] = True
|
||||
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
||||
|
||||
if provider_name == "gemini":
|
||||
raw_thinking_config = _build_gemini_thinking_config(model, reasoning_config)
|
||||
@@ -423,6 +375,120 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
|
||||
return api_kwargs
|
||||
|
||||
def _build_kwargs_from_profile(self, profile, model, sanitized, tools, params):
|
||||
"""Build API kwargs using a ProviderProfile — single path, no legacy flags.
|
||||
|
||||
This method replaces the entire flag-based kwargs assembly when a
|
||||
provider_profile is passed. Every quirk comes from the profile object.
|
||||
"""
|
||||
from providers.base import OMIT_TEMPERATURE
|
||||
|
||||
# Message preprocessing
|
||||
sanitized = profile.prepare_messages(sanitized)
|
||||
|
||||
# Developer role swap — model-name-based, applies to all providers
|
||||
_model_lower = (model or "").lower()
|
||||
if (
|
||||
sanitized
|
||||
and isinstance(sanitized[0], dict)
|
||||
and sanitized[0].get("role") == "system"
|
||||
and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
|
||||
):
|
||||
sanitized = list(sanitized)
|
||||
sanitized[0] = {**sanitized[0], "role": "developer"}
|
||||
|
||||
api_kwargs: dict[str, Any] = {
|
||||
"model": model,
|
||||
"messages": sanitized,
|
||||
}
|
||||
|
||||
# Temperature
|
||||
if profile.fixed_temperature is OMIT_TEMPERATURE:
|
||||
pass # Don't include temperature at all
|
||||
elif profile.fixed_temperature is not None:
|
||||
api_kwargs["temperature"] = profile.fixed_temperature
|
||||
else:
|
||||
# Use caller's temperature if provided
|
||||
temp = params.get("temperature")
|
||||
if temp is not None:
|
||||
api_kwargs["temperature"] = temp
|
||||
|
||||
# Timeout
|
||||
timeout = params.get("timeout")
|
||||
if timeout is not None:
|
||||
api_kwargs["timeout"] = timeout
|
||||
|
||||
# Tools — apply Moonshot/Kimi schema sanitization regardless of path
|
||||
if tools:
|
||||
if is_moonshot_model(model):
|
||||
tools = sanitize_moonshot_tools(tools)
|
||||
api_kwargs["tools"] = tools
|
||||
|
||||
# max_tokens resolution — priority: ephemeral > user > profile default
|
||||
max_tokens_fn = params.get("max_tokens_param_fn")
|
||||
ephemeral = params.get("ephemeral_max_output_tokens")
|
||||
user_max = params.get("max_tokens")
|
||||
anthropic_max = params.get("anthropic_max_output")
|
||||
|
||||
if ephemeral is not None and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(ephemeral))
|
||||
elif user_max is not None and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(user_max))
|
||||
elif profile.default_max_tokens and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(profile.default_max_tokens))
|
||||
elif anthropic_max is not None:
|
||||
api_kwargs["max_tokens"] = anthropic_max
|
||||
|
||||
# Provider-specific api_kwargs extras (reasoning_effort, metadata, etc.)
|
||||
reasoning_config = params.get("reasoning_config")
|
||||
extra_body_from_profile, top_level_from_profile = (
|
||||
profile.build_api_kwargs_extras(
|
||||
reasoning_config=reasoning_config,
|
||||
supports_reasoning=params.get("supports_reasoning", False),
|
||||
qwen_session_metadata=params.get("qwen_session_metadata"),
|
||||
model=model,
|
||||
ollama_num_ctx=params.get("ollama_num_ctx"),
|
||||
)
|
||||
)
|
||||
api_kwargs.update(top_level_from_profile)
|
||||
|
||||
# extra_body assembly
|
||||
extra_body: dict[str, Any] = {}
|
||||
|
||||
# Profile's extra_body (tags, provider prefs, vl_high_resolution, etc.)
|
||||
profile_body = profile.build_extra_body(
|
||||
session_id=params.get("session_id"),
|
||||
provider_preferences=params.get("provider_preferences"),
|
||||
model=model,
|
||||
base_url=params.get("base_url"),
|
||||
reasoning_config=reasoning_config,
|
||||
)
|
||||
if profile_body:
|
||||
extra_body.update(profile_body)
|
||||
|
||||
# Profile's reasoning/thinking extra_body entries
|
||||
if extra_body_from_profile:
|
||||
extra_body.update(extra_body_from_profile)
|
||||
|
||||
# Merge any pre-built extra_body additions from the caller
|
||||
additions = params.get("extra_body_additions")
|
||||
if additions:
|
||||
extra_body.update(additions)
|
||||
|
||||
# Request overrides (user config)
|
||||
overrides = params.get("request_overrides")
|
||||
if overrides:
|
||||
for k, v in overrides.items():
|
||||
if k == "extra_body" and isinstance(v, dict):
|
||||
extra_body.update(v)
|
||||
else:
|
||||
api_kwargs[k] = v
|
||||
|
||||
if extra_body:
|
||||
api_kwargs["extra_body"] = extra_body
|
||||
|
||||
return api_kwargs
|
||||
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
"""Normalize OpenAI ChatCompletion to NormalizedResponse.
|
||||
|
||||
@@ -444,7 +510,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
# Gemini 3 thinking models attach extra_content with
|
||||
# thought_signature — without replay on the next turn the API
|
||||
# rejects the request with 400.
|
||||
tc_provider_data: Dict[str, Any] = {}
|
||||
tc_provider_data: dict[str, Any] = {}
|
||||
extra = getattr(tc, "extra_content", None)
|
||||
if extra is None and hasattr(tc, "model_extra"):
|
||||
extra = (tc.model_extra or {}).get("extra_content")
|
||||
@@ -455,12 +521,14 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
except Exception:
|
||||
pass
|
||||
tc_provider_data["extra_content"] = extra
|
||||
tool_calls.append(ToolCall(
|
||||
id=tc.id,
|
||||
name=tc.function.name,
|
||||
arguments=tc.function.arguments,
|
||||
provider_data=tc_provider_data or None,
|
||||
))
|
||||
tool_calls.append(
|
||||
ToolCall(
|
||||
id=tc.id,
|
||||
name=tc.function.name,
|
||||
arguments=tc.function.arguments,
|
||||
provider_data=tc_provider_data or None,
|
||||
)
|
||||
)
|
||||
|
||||
usage = None
|
||||
if hasattr(response, "usage") and response.usage:
|
||||
@@ -477,9 +545,13 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
# so keep them apart in provider_data rather than merging.
|
||||
reasoning = getattr(msg, "reasoning", None)
|
||||
reasoning_content = getattr(msg, "reasoning_content", None)
|
||||
if reasoning_content is None and hasattr(msg, "model_extra"):
|
||||
model_extra = getattr(msg, "model_extra", None) or {}
|
||||
if isinstance(model_extra, dict) and "reasoning_content" in model_extra:
|
||||
reasoning_content = model_extra["reasoning_content"]
|
||||
|
||||
provider_data: Dict[str, Any] = {}
|
||||
if reasoning_content:
|
||||
if reasoning_content is not None:
|
||||
provider_data["reasoning_content"] = reasoning_content
|
||||
rd = getattr(msg, "reasoning_details", None)
|
||||
if rd:
|
||||
@@ -504,7 +576,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
return False
|
||||
return True
|
||||
|
||||
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
|
||||
def extract_cache_stats(self, response: Any) -> dict[str, int] | None:
|
||||
"""Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
|
||||
usage = getattr(response, "usage", None)
|
||||
if usage is None:
|
||||
|
||||
@@ -143,7 +143,18 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
kwargs["max_output_tokens"] = max_tokens
|
||||
|
||||
if is_xai_responses and session_id:
|
||||
kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
|
||||
existing_extra_headers = kwargs.get("extra_headers")
|
||||
merged_extra_headers: Dict[str, str] = {}
|
||||
if isinstance(existing_extra_headers, dict):
|
||||
merged_extra_headers.update(
|
||||
{
|
||||
str(key): str(value)
|
||||
for key, value in existing_extra_headers.items()
|
||||
if key and value is not None
|
||||
}
|
||||
)
|
||||
merged_extra_headers["x-grok-conv-id"] = session_id
|
||||
kwargs["extra_headers"] = merged_extra_headers
|
||||
|
||||
return kwargs
|
||||
|
||||
|
||||
+15
-14
@@ -12,7 +12,7 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -32,10 +32,10 @@ class ToolCall:
|
||||
* Others: ``None``
|
||||
"""
|
||||
|
||||
id: Optional[str]
|
||||
id: str | None
|
||||
name: str
|
||||
arguments: str # JSON string
|
||||
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
|
||||
provider_data: dict[str, Any] | None = field(default=None, repr=False)
|
||||
|
||||
# ── Backward compatibility ──────────────────────────────────
|
||||
# The agent loop reads tc.function.name / tc.function.arguments
|
||||
@@ -47,17 +47,17 @@ class ToolCall:
|
||||
return "function"
|
||||
|
||||
@property
|
||||
def function(self) -> "ToolCall":
|
||||
def function(self) -> ToolCall:
|
||||
"""Return self so tc.function.name / tc.function.arguments work."""
|
||||
return self
|
||||
|
||||
@property
|
||||
def call_id(self) -> Optional[str]:
|
||||
def call_id(self) -> str | None:
|
||||
"""Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
|
||||
return (self.provider_data or {}).get("call_id")
|
||||
|
||||
@property
|
||||
def response_item_id(self) -> Optional[str]:
|
||||
def response_item_id(self) -> str | None:
|
||||
"""Codex response_item_id from provider_data."""
|
||||
return (self.provider_data or {}).get("response_item_id")
|
||||
|
||||
@@ -101,18 +101,18 @@ class NormalizedResponse:
|
||||
* Others: ``None``
|
||||
"""
|
||||
|
||||
content: Optional[str]
|
||||
tool_calls: Optional[List[ToolCall]]
|
||||
content: str | None
|
||||
tool_calls: list[ToolCall] | None
|
||||
finish_reason: str # "stop", "tool_calls", "length", "content_filter"
|
||||
reasoning: Optional[str] = None
|
||||
usage: Optional[Usage] = None
|
||||
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
|
||||
reasoning: str | None = None
|
||||
usage: Usage | None = None
|
||||
provider_data: dict[str, Any] | None = field(default=None, repr=False)
|
||||
|
||||
# ── Backward compatibility ──────────────────────────────────
|
||||
# The shim _nr_to_assistant_message() mapped these from provider_data.
|
||||
# These properties let NormalizedResponse pass through directly.
|
||||
@property
|
||||
def reasoning_content(self) -> Optional[str]:
|
||||
def reasoning_content(self) -> str | None:
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("reasoning_content")
|
||||
|
||||
@@ -136,8 +136,9 @@ class NormalizedResponse:
|
||||
# Factory helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def build_tool_call(
|
||||
id: Optional[str],
|
||||
id: str | None,
|
||||
name: str,
|
||||
arguments: Any,
|
||||
**provider_fields: Any,
|
||||
@@ -151,7 +152,7 @@ def build_tool_call(
|
||||
return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
|
||||
|
||||
|
||||
def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
|
||||
def map_finish_reason(reason: str | None, mapping: dict[str, str]) -> str:
|
||||
"""Translate a provider-specific stop reason to the normalised set.
|
||||
|
||||
Falls back to ``"stop"`` for unknown or ``None`` reasons.
|
||||
|
||||
@@ -121,6 +121,18 @@ model:
|
||||
# # Data policy: "allow" (default) or "deny" to exclude providers that may store data
|
||||
# # data_collection: "deny"
|
||||
|
||||
# =============================================================================
|
||||
# OpenRouter Response Caching (only applies when using OpenRouter)
|
||||
# =============================================================================
|
||||
# Cache identical API responses at the OpenRouter edge for free instant replays.
|
||||
# When enabled, identical requests (same model, messages, parameters) return
|
||||
# cached responses with zero billing. Separate from Anthropic prompt caching.
|
||||
# See: https://openrouter.ai/docs/guides/features/response-caching
|
||||
#
|
||||
# openrouter:
|
||||
# response_cache: true # Enable response caching (default: true)
|
||||
# response_cache_ttl: 300 # Cache TTL in seconds, 1-86400 (default: 300)
|
||||
|
||||
# =============================================================================
|
||||
# Git Worktree Isolation
|
||||
# =============================================================================
|
||||
@@ -289,6 +301,25 @@ browser:
|
||||
# after this period of no activity between agent loops (default: 120 = 2 minutes)
|
||||
inactivity_timeout: 120
|
||||
|
||||
# =============================================================================
|
||||
# Tool Loop Guardrails
|
||||
# =============================================================================
|
||||
# Soft warnings are enabled by default. They append guidance to repeated failed
|
||||
# or non-progressing tool results but still let the tool execute. Hard stops are
|
||||
# opt-in circuit breakers for autonomous/cron sessions where stopping a loop is
|
||||
# preferable to spending the full iteration budget.
|
||||
tool_loop_guardrails:
|
||||
warnings_enabled: true
|
||||
hard_stop_enabled: false
|
||||
warn_after:
|
||||
exact_failure: 2
|
||||
same_tool_failure: 3
|
||||
idempotent_no_progress: 2
|
||||
hard_stop_after:
|
||||
exact_failure: 5
|
||||
same_tool_failure: 8
|
||||
idempotent_no_progress: 5
|
||||
|
||||
# =============================================================================
|
||||
# Context Compression (Auto-shrinks long conversations)
|
||||
# =============================================================================
|
||||
|
||||
+174
-8
@@ -420,7 +420,7 @@ def _normalize_workdir(workdir: Optional[str]) -> Optional[str]:
|
||||
|
||||
|
||||
def create_job(
|
||||
prompt: str,
|
||||
prompt: Optional[str],
|
||||
schedule: str,
|
||||
name: Optional[str] = None,
|
||||
repeat: Optional[int] = None,
|
||||
@@ -435,12 +435,14 @@ def create_job(
|
||||
context_from: Optional[Union[str, List[str]]] = None,
|
||||
enabled_toolsets: Optional[List[str]] = None,
|
||||
workdir: Optional[str] = None,
|
||||
no_agent: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a new cron job.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to run (must be self-contained, or a task instruction when skill is set)
|
||||
prompt: The prompt to run (must be self-contained, or a task instruction when skill is set).
|
||||
Ignored when ``no_agent=True`` except as an optional name hint.
|
||||
schedule: Schedule string (see parse_schedule)
|
||||
name: Optional friendly name
|
||||
repeat: How many times to run (None = forever, 1 = once)
|
||||
@@ -451,21 +453,33 @@ def create_job(
|
||||
model: Optional per-job model override
|
||||
provider: Optional per-job provider override
|
||||
base_url: Optional per-job base URL override
|
||||
script: Optional path to a Python script whose stdout is injected into the
|
||||
prompt each run. The script runs before the agent turn, and its output
|
||||
is prepended as context. Useful for data collection / change detection.
|
||||
script: Optional path to a script whose stdout feeds the job. With
|
||||
``no_agent=True`` the script IS the job — its stdout is
|
||||
delivered verbatim. Without ``no_agent``, its stdout is
|
||||
injected into the agent's prompt as context (data-collection /
|
||||
change-detection pattern). Paths resolve under
|
||||
~/.hermes/scripts/; ``.sh`` / ``.bash`` files run via bash,
|
||||
anything else via Python.
|
||||
context_from: Optional job ID (or list of job IDs) whose most recent output
|
||||
is injected into the prompt as context before each run.
|
||||
Useful for chaining cron jobs: job A finds data, job B processes it.
|
||||
enabled_toolsets: Optional list of toolset names to restrict the agent to.
|
||||
When set, only tools from these toolsets are loaded, reducing
|
||||
token overhead. When omitted, all default tools are loaded.
|
||||
Ignored when ``no_agent=True``.
|
||||
workdir: Optional absolute path. When set, the job runs as if launched
|
||||
from that directory: AGENTS.md / CLAUDE.md / .cursorrules from
|
||||
that directory are injected into the system prompt, and the
|
||||
terminal/file/code_exec tools use it as their working directory
|
||||
(via TERMINAL_CWD). When unset, the old behaviour is preserved
|
||||
(no context files injected, tools use the scheduler's cwd).
|
||||
With ``no_agent=True``, ``workdir`` is still applied as the
|
||||
script's cwd so relative paths inside the script behave
|
||||
predictably.
|
||||
no_agent: When True, skip the agent entirely — run ``script`` on schedule
|
||||
and deliver its stdout directly. Empty stdout = silent (no
|
||||
delivery). Requires ``script`` to be set. Ideal for classic
|
||||
watchdogs and periodic alerts that don't need LLM reasoning.
|
||||
|
||||
Returns:
|
||||
The created job dict
|
||||
@@ -499,6 +513,16 @@ def create_job(
|
||||
normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
|
||||
normalized_toolsets = normalized_toolsets or None
|
||||
normalized_workdir = _normalize_workdir(workdir)
|
||||
normalized_no_agent = bool(no_agent)
|
||||
|
||||
# no_agent jobs are meaningless without a script — the script IS the job.
|
||||
# Surface this as a clear ValueError at create time so bad configs never
|
||||
# reach the scheduler.
|
||||
if normalized_no_agent and not normalized_script:
|
||||
raise ValueError(
|
||||
"no_agent=True requires a script — with no agent and no script "
|
||||
"there is nothing for the job to run."
|
||||
)
|
||||
|
||||
# Normalize context_from: accept str or list of str, store as list or None
|
||||
if isinstance(context_from, str):
|
||||
@@ -508,7 +532,7 @@ def create_job(
|
||||
else:
|
||||
context_from = None
|
||||
|
||||
label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
|
||||
label_source = (prompt or (normalized_skills[0] if normalized_skills else None) or (normalized_script if normalized_no_agent else None)) or "cron job"
|
||||
job = {
|
||||
"id": job_id,
|
||||
"name": name or label_source[:50].strip(),
|
||||
@@ -519,6 +543,7 @@ def create_job(
|
||||
"provider": normalized_provider,
|
||||
"base_url": normalized_base_url,
|
||||
"script": normalized_script,
|
||||
"no_agent": normalized_no_agent,
|
||||
"context_from": context_from,
|
||||
"schedule": parsed_schedule,
|
||||
"schedule_display": parsed_schedule.get("display", schedule),
|
||||
@@ -785,6 +810,12 @@ def get_due_jobs() -> List[Dict[str, Any]]:
|
||||
the job is fast-forwarded to the next future run instead of firing
|
||||
immediately. This prevents a burst of missed jobs on gateway restart.
|
||||
"""
|
||||
with _jobs_file_lock:
|
||||
return _get_due_jobs_locked()
|
||||
|
||||
|
||||
def _get_due_jobs_locked() -> List[Dict[str, Any]]:
|
||||
"""Inner implementation of get_due_jobs(); must be called with _jobs_file_lock held."""
|
||||
now = _hermes_now()
|
||||
raw_jobs = load_jobs()
|
||||
jobs = [_apply_skill_fields(j) for j in copy.deepcopy(raw_jobs)]
|
||||
@@ -797,19 +828,36 @@ def get_due_jobs() -> List[Dict[str, Any]]:
|
||||
|
||||
next_run = job.get("next_run_at")
|
||||
if not next_run:
|
||||
schedule = job.get("schedule", {})
|
||||
kind = schedule.get("kind")
|
||||
|
||||
# One-shot jobs use a small grace window via the dedicated helper.
|
||||
recovered_next = _recoverable_oneshot_run_at(
|
||||
job.get("schedule", {}),
|
||||
schedule,
|
||||
now,
|
||||
last_run_at=job.get("last_run_at"),
|
||||
)
|
||||
recovery_kind = "one-shot" if recovered_next else None
|
||||
|
||||
# Recurring jobs reach here only when something — typically a
|
||||
# direct jobs.json edit that bypassed add_job() — left
|
||||
# next_run_at unset. Without this branch, such jobs are
|
||||
# silently skipped forever; recompute next_run_at from the
|
||||
# schedule so they pick up at their next scheduled tick.
|
||||
if not recovered_next and kind in ("cron", "interval"):
|
||||
recovered_next = compute_next_run(schedule, now.isoformat())
|
||||
if recovered_next:
|
||||
recovery_kind = kind
|
||||
|
||||
if not recovered_next:
|
||||
continue
|
||||
|
||||
job["next_run_at"] = recovered_next
|
||||
next_run = recovered_next
|
||||
logger.info(
|
||||
"Job '%s' had no next_run_at; recovering one-shot run at %s",
|
||||
"Job '%s' had no next_run_at; recovering %s run at %s",
|
||||
job.get("name", job["id"]),
|
||||
recovery_kind,
|
||||
recovered_next,
|
||||
)
|
||||
for rj in raw_jobs:
|
||||
@@ -882,3 +930,121 @@ def save_job_output(job_id: str, output: str):
|
||||
raise
|
||||
|
||||
return output_file
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Skill reference rewriting (curator integration)
|
||||
# =============================================================================
|
||||
|
||||
def rewrite_skill_refs(
|
||||
consolidated: Optional[Dict[str, str]] = None,
|
||||
pruned: Optional[List[str]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Rewrite cron job skill references after a curator consolidation pass.
|
||||
|
||||
When the curator consolidates a skill X into umbrella Y (or archives X
|
||||
as pruned), any cron job that lists ``X`` in its ``skills`` field will
|
||||
fail to load ``X`` at run time — the scheduler logs a warning and
|
||||
skips the skill, so the job runs without the instructions it was
|
||||
scheduled to follow. See cron/scheduler.py where ``skill_view`` is
|
||||
called per skill name.
|
||||
|
||||
This function repairs cron jobs in-place:
|
||||
|
||||
- A skill listed in ``consolidated`` is replaced with its umbrella
|
||||
target (the ``into`` value). If the umbrella is already in the
|
||||
job's skill list, the stale name is dropped without duplication.
|
||||
- A skill listed in ``pruned`` is dropped outright — there is no
|
||||
forwarding target.
|
||||
- Ordering and other skills in the list are preserved.
|
||||
- The legacy ``skill`` field is realigned via ``_apply_skill_fields``.
|
||||
|
||||
Args:
|
||||
consolidated: mapping of ``old_skill_name -> umbrella_skill_name``.
|
||||
pruned: list of skill names that were archived with no forwarding
|
||||
target.
|
||||
|
||||
Returns a report dict::
|
||||
|
||||
{
|
||||
"rewrites": [
|
||||
{
|
||||
"job_id": ...,
|
||||
"job_name": ...,
|
||||
"before": [...],
|
||||
"after": [...],
|
||||
"mapped": {"old": "new", ...},
|
||||
"dropped": ["old", ...],
|
||||
},
|
||||
...
|
||||
],
|
||||
"jobs_updated": N,
|
||||
"jobs_scanned": M,
|
||||
}
|
||||
|
||||
Best-effort: exceptions from loading/saving propagate to the caller so
|
||||
tests can assert behaviour; the curator invocation site wraps this
|
||||
call in a try/except so a failure here never breaks the curator.
|
||||
"""
|
||||
consolidated = dict(consolidated or {})
|
||||
pruned_set = set(pruned or [])
|
||||
# A skill listed in both wins as "consolidated" — it has a target,
|
||||
# which is the more useful of the two outcomes.
|
||||
pruned_set -= set(consolidated.keys())
|
||||
|
||||
if not consolidated and not pruned_set:
|
||||
return {"rewrites": [], "jobs_updated": 0, "jobs_scanned": 0}
|
||||
|
||||
with _jobs_file_lock:
|
||||
jobs = load_jobs()
|
||||
rewrites: List[Dict[str, Any]] = []
|
||||
changed = False
|
||||
|
||||
for job in jobs:
|
||||
skills_before = _normalize_skill_list(job.get("skill"), job.get("skills"))
|
||||
if not skills_before:
|
||||
continue
|
||||
|
||||
mapped: Dict[str, str] = {}
|
||||
dropped: List[str] = []
|
||||
new_skills: List[str] = []
|
||||
|
||||
for name in skills_before:
|
||||
if name in consolidated:
|
||||
target = consolidated[name]
|
||||
mapped[name] = target
|
||||
if target and target not in new_skills:
|
||||
new_skills.append(target)
|
||||
elif name in pruned_set:
|
||||
dropped.append(name)
|
||||
else:
|
||||
if name not in new_skills:
|
||||
new_skills.append(name)
|
||||
|
||||
if not mapped and not dropped:
|
||||
continue
|
||||
|
||||
job["skills"] = new_skills
|
||||
job["skill"] = new_skills[0] if new_skills else None
|
||||
changed = True
|
||||
|
||||
rewrites.append({
|
||||
"job_id": job.get("id"),
|
||||
"job_name": job.get("name") or job.get("id"),
|
||||
"before": list(skills_before),
|
||||
"after": list(new_skills),
|
||||
"mapped": mapped,
|
||||
"dropped": dropped,
|
||||
})
|
||||
|
||||
if changed:
|
||||
save_jobs(jobs)
|
||||
logger.info(
|
||||
"Curator rewrote skill references in %d cron job(s)", len(rewrites)
|
||||
)
|
||||
|
||||
return {
|
||||
"rewrites": rewrites,
|
||||
"jobs_updated": len(rewrites),
|
||||
"jobs_scanned": len(jobs),
|
||||
}
|
||||
|
||||
+204
-29
@@ -35,7 +35,7 @@ from typing import List, Optional
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.config import load_config, _expand_env_vars
|
||||
from hermes_time import now as _hermes_now
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -114,18 +114,36 @@ from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_
|
||||
# locally for audit.
|
||||
SILENT_MARKER = "[SILENT]"
|
||||
|
||||
# Resolve Hermes home directory (respects HERMES_HOME override)
|
||||
_hermes_home = get_hermes_home()
|
||||
# Backward-compatible module override used by tests and emergency monkeypatches.
|
||||
_hermes_home: Path | None = None
|
||||
|
||||
# File-based lock prevents concurrent ticks from gateway + daemon + systemd timer
|
||||
_LOCK_DIR = _hermes_home / "cron"
|
||||
_LOCK_FILE = _LOCK_DIR / ".tick.lock"
|
||||
|
||||
def _get_hermes_home() -> Path:
|
||||
"""Resolve Hermes home dynamically while preserving test monkeypatch hooks."""
|
||||
return _hermes_home or get_hermes_home()
|
||||
|
||||
|
||||
def _get_lock_paths() -> tuple[Path, Path]:
|
||||
"""Resolve cron lock paths at call time so profile/env changes are honored."""
|
||||
hermes_home = _get_hermes_home()
|
||||
lock_dir = hermes_home / "cron"
|
||||
return lock_dir, lock_dir / ".tick.lock"
|
||||
|
||||
|
||||
def _resolve_origin(job: dict) -> Optional[dict]:
|
||||
"""Extract origin info from a job, preserving any extra routing metadata."""
|
||||
"""Extract origin info from a job, preserving any extra routing metadata.
|
||||
|
||||
Treats non-dict origins (free-form provenance strings, ints, lists from
|
||||
migration scripts or hand-edited jobs.json) as missing instead of
|
||||
crashing with ``AttributeError`` on ``origin.get(...)``. Without this
|
||||
guard, a job tagged with e.g. ``"combined-digest-replaces-x-and-y"``
|
||||
crashed every fire attempt with
|
||||
``'str' object has no attribute 'get'`` — ``mark_job_run`` recorded the
|
||||
failure, but the next tick re-loaded the same poisoned origin and
|
||||
crashed identically until the field was patched manually (#18722).
|
||||
"""
|
||||
origin = job.get("origin")
|
||||
if not origin:
|
||||
if not isinstance(origin, dict):
|
||||
return None
|
||||
platform = origin.get("platform")
|
||||
chat_id = origin.get("chat_id")
|
||||
@@ -147,6 +165,19 @@ def _get_home_target_chat_id(platform_name: str) -> str:
|
||||
return value
|
||||
|
||||
|
||||
def _get_home_target_thread_id(platform_name: str) -> Optional[str]:
|
||||
"""Return the optional thread/topic ID for a platform home target."""
|
||||
env_var = _HOME_TARGET_ENV_VARS.get(platform_name.lower())
|
||||
if not env_var:
|
||||
return None
|
||||
value = os.getenv(f"{env_var}_THREAD_ID", "").strip()
|
||||
if not value:
|
||||
legacy = _LEGACY_HOME_TARGET_ENV_VARS.get(env_var)
|
||||
if legacy:
|
||||
value = os.getenv(f"{legacy}_THREAD_ID", "").strip()
|
||||
return value or None
|
||||
|
||||
|
||||
def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[dict]:
|
||||
"""Resolve one concrete auto-delivery target for a cron job."""
|
||||
|
||||
@@ -175,7 +206,7 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
|
||||
return {
|
||||
"platform": platform_name,
|
||||
"chat_id": chat_id,
|
||||
"thread_id": None,
|
||||
"thread_id": _get_home_target_thread_id(platform_name),
|
||||
}
|
||||
return None
|
||||
|
||||
@@ -229,7 +260,7 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
|
||||
return {
|
||||
"platform": platform_name,
|
||||
"chat_id": chat_id,
|
||||
"thread_id": None,
|
||||
"thread_id": _get_home_target_thread_id(platform_name),
|
||||
}
|
||||
|
||||
|
||||
@@ -394,7 +425,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
thread_id = target.get("thread_id")
|
||||
|
||||
# Diagnostic: log thread_id for topic-aware delivery debugging
|
||||
origin = job.get("origin") or {}
|
||||
origin = _resolve_origin(job) or {}
|
||||
origin_thread = origin.get("thread_id")
|
||||
if origin_thread and not thread_id:
|
||||
logger.warning(
|
||||
@@ -553,8 +584,18 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
|
||||
prevent arbitrary script execution via path traversal or absolute
|
||||
path injection.
|
||||
|
||||
Supported interpreters (chosen by file extension):
|
||||
|
||||
* ``.sh`` / ``.bash`` — run with ``/bin/bash``
|
||||
* anything else — run with the current Python interpreter
|
||||
(``sys.executable``), preserving the original behaviour for
|
||||
Python-based pre-check and data-collection scripts.
|
||||
|
||||
Shell support lets ``no_agent=True`` jobs ship classic bash watchdogs
|
||||
(the `memory-watchdog.sh` pattern) without wrapping them in Python.
|
||||
|
||||
Args:
|
||||
script_path: Path to a Python script. Relative paths are resolved
|
||||
script_path: Path to the script. Relative paths are resolved
|
||||
against HERMES_HOME/scripts/. Absolute and ~-prefixed paths
|
||||
are also validated to ensure they stay within the scripts dir.
|
||||
|
||||
@@ -564,7 +605,7 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
|
||||
"""
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
scripts_dir = get_hermes_home() / "scripts"
|
||||
scripts_dir = _get_hermes_home() / "scripts"
|
||||
scripts_dir.mkdir(parents=True, exist_ok=True)
|
||||
scripts_dir_resolved = scripts_dir.resolve()
|
||||
|
||||
@@ -591,9 +632,19 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
|
||||
|
||||
script_timeout = _get_script_timeout()
|
||||
|
||||
# Pick an interpreter by extension. Bash for .sh/.bash, Python for
|
||||
# everything else. We deliberately do NOT honour the file's own
|
||||
# shebang: the scripts dir is trusted, but keeping the interpreter
|
||||
# choice explicit here keeps the allowed surface small and auditable.
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in (".sh", ".bash"):
|
||||
argv = ["/bin/bash", str(path)]
|
||||
else:
|
||||
argv = [sys.executable, str(path)]
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(path)],
|
||||
argv,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=script_timeout,
|
||||
@@ -683,10 +734,8 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
|
||||
f"{prompt}"
|
||||
)
|
||||
else:
|
||||
prompt = (
|
||||
"[Script ran successfully but produced no output.]\n\n"
|
||||
f"{prompt}"
|
||||
)
|
||||
# Script produced no output — nothing to report, skip AI call.
|
||||
return None
|
||||
else:
|
||||
prompt = (
|
||||
"## Script Error\n"
|
||||
@@ -759,6 +808,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
|
||||
return prompt
|
||||
|
||||
from tools.skills_tool import skill_view
|
||||
from tools.skill_usage import bump_use
|
||||
|
||||
parts = []
|
||||
skipped: list[str] = []
|
||||
@@ -770,6 +820,12 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
|
||||
skipped.append(skill_name)
|
||||
continue
|
||||
|
||||
# Bump usage so the curator sees this skill as actively used.
|
||||
try:
|
||||
bump_use(skill_name)
|
||||
except Exception:
|
||||
logger.debug("Cron job: failed to bump skill usage for '%s'", skill_name, exc_info=True)
|
||||
|
||||
content = str(loaded.get("content") or "").strip()
|
||||
if parts:
|
||||
parts.append("")
|
||||
@@ -802,8 +858,120 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
Returns:
|
||||
Tuple of (success, full_output_doc, final_response, error_message)
|
||||
"""
|
||||
job_id = job["id"]
|
||||
job_name = job["name"]
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# no_agent short-circuit — the script IS the job, no LLM involvement.
|
||||
# ---------------------------------------------------------------
|
||||
# This mirrors the classic "run a bash script on a timer, send its
|
||||
# stdout to telegram" watchdog pattern. The agent path is skipped
|
||||
# entirely: no AIAgent, no prompt, no tool loop, no token spend.
|
||||
#
|
||||
# We check this BEFORE importing run_agent / constructing SessionDB so
|
||||
# a pure-script tick never pays for the agent machinery it isn't going
|
||||
# to use. Keep this block self-contained.
|
||||
#
|
||||
# Semantics:
|
||||
# - script stdout (trimmed) → delivered verbatim as the final message
|
||||
# - empty stdout → silent run (no delivery, success=True)
|
||||
# - non-zero exit / timeout → delivered as an error alert, success=False
|
||||
# - wakeAgent=false gate → treated like empty stdout (silent), since
|
||||
# the whole point of no_agent is that there
|
||||
# is no agent to wake
|
||||
if job.get("no_agent"):
|
||||
script_path = job.get("script")
|
||||
if not script_path:
|
||||
err = "no_agent=True but no script is set for this job"
|
||||
logger.error("Job '%s': %s", job_id, err)
|
||||
return False, "", "", err
|
||||
|
||||
# Apply workdir if configured — lets scripts use predictable relative
|
||||
# paths. For no_agent jobs this is just the subprocess cwd (not an
|
||||
# agent TERMINAL_CWD bridge).
|
||||
_job_workdir = (job.get("workdir") or "").strip() or None
|
||||
_prior_cwd = None
|
||||
if _job_workdir and Path(_job_workdir).is_dir():
|
||||
_prior_cwd = os.getcwd()
|
||||
try:
|
||||
os.chdir(_job_workdir)
|
||||
except OSError:
|
||||
_prior_cwd = None
|
||||
|
||||
try:
|
||||
ok, output = _run_job_script(script_path)
|
||||
finally:
|
||||
if _prior_cwd is not None:
|
||||
try:
|
||||
os.chdir(_prior_cwd)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
now_iso = _hermes_now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
if not ok:
|
||||
# Script crashed / timed out / exited non-zero. Deliver the
|
||||
# error so the user knows the watchdog itself broke — silent
|
||||
# failure for an alerting job is the worst-case outcome.
|
||||
alert = (
|
||||
f"⚠ Cron watchdog '{job_name}' script failed\n\n"
|
||||
f"{output}\n\n"
|
||||
f"Time: {now_iso}"
|
||||
)
|
||||
doc = (
|
||||
f"# Cron Job: {job_name}\n\n"
|
||||
f"**Job ID:** {job_id}\n"
|
||||
f"**Run Time:** {now_iso}\n"
|
||||
f"**Mode:** no_agent (script)\n"
|
||||
f"**Status:** script failed\n\n"
|
||||
f"{output}\n"
|
||||
)
|
||||
return False, doc, alert, output
|
||||
|
||||
# Honour the wakeAgent gate as a silent signal — `wakeAgent: false`
|
||||
# means "nothing to report this tick", same as empty stdout.
|
||||
if not _parse_wake_gate(output):
|
||||
logger.info(
|
||||
"Job '%s' (no_agent): wakeAgent=false gate — silent run", job_id
|
||||
)
|
||||
silent_doc = (
|
||||
f"# Cron Job: {job_name}\n\n"
|
||||
f"**Job ID:** {job_id}\n"
|
||||
f"**Run Time:** {now_iso}\n"
|
||||
f"**Mode:** no_agent (script)\n"
|
||||
f"**Status:** silent (wakeAgent=false)\n"
|
||||
)
|
||||
return True, silent_doc, SILENT_MARKER, None
|
||||
|
||||
if not output.strip():
|
||||
logger.info("Job '%s' (no_agent): empty stdout — silent run", job_id)
|
||||
silent_doc = (
|
||||
f"# Cron Job: {job_name}\n\n"
|
||||
f"**Job ID:** {job_id}\n"
|
||||
f"**Run Time:** {now_iso}\n"
|
||||
f"**Mode:** no_agent (script)\n"
|
||||
f"**Status:** silent (empty output)\n"
|
||||
)
|
||||
return True, silent_doc, SILENT_MARKER, None
|
||||
|
||||
doc = (
|
||||
f"# Cron Job: {job_name}\n\n"
|
||||
f"**Job ID:** {job_id}\n"
|
||||
f"**Run Time:** {now_iso}\n"
|
||||
f"**Mode:** no_agent (script)\n\n"
|
||||
f"---\n\n"
|
||||
f"{output}\n"
|
||||
)
|
||||
return True, doc, output, None
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Default (LLM) path — import and construct the agent machinery now
|
||||
# that we know we actually need it. Doing these imports here instead of
|
||||
# at module top keeps no_agent ticks from paying for AIAgent / SessionDB
|
||||
# construction costs.
|
||||
# ---------------------------------------------------------------
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
# Initialize SQLite session store so cron job messages are persisted
|
||||
# and discoverable via session_search (same pattern as gateway/run.py).
|
||||
_session_db = None
|
||||
@@ -812,9 +980,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
_session_db = SessionDB()
|
||||
except Exception as e:
|
||||
logger.debug("Job '%s': SQLite session store not available: %s", job.get("id", "?"), e)
|
||||
|
||||
job_id = job["id"]
|
||||
job_name = job["name"]
|
||||
|
||||
# Wake-gate: if this job has a pre-check script, run it BEFORE building
|
||||
# the prompt so a ``{"wakeAgent": false}`` response can short-circuit
|
||||
@@ -839,6 +1004,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
return True, silent_doc, SILENT_MARKER, None
|
||||
|
||||
prompt = _build_job_prompt(job, prerun_script=prerun_script)
|
||||
if prompt is None:
|
||||
logger.info("Job '%s': script produced no output, skipping AI call.", job_name)
|
||||
return True, "", SILENT_MARKER, None
|
||||
origin = _resolve_origin(job)
|
||||
_cron_session_id = f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}"
|
||||
|
||||
@@ -898,9 +1066,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
# changes take effect without a gateway restart.
|
||||
from dotenv import load_dotenv
|
||||
try:
|
||||
load_dotenv(str(_hermes_home / ".env"), override=True, encoding="utf-8")
|
||||
load_dotenv(str(_get_hermes_home() / ".env"), override=True, encoding="utf-8")
|
||||
except UnicodeDecodeError:
|
||||
load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1")
|
||||
load_dotenv(str(_get_hermes_home() / ".env"), override=True, encoding="latin-1")
|
||||
|
||||
delivery_target = _resolve_delivery_target(job)
|
||||
if delivery_target:
|
||||
@@ -918,10 +1086,11 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
_cfg = {}
|
||||
try:
|
||||
import yaml
|
||||
_cfg_path = str(_hermes_home / "config.yaml")
|
||||
_cfg_path = str(_get_hermes_home() / "config.yaml")
|
||||
if os.path.exists(_cfg_path):
|
||||
with open(_cfg_path) as _f:
|
||||
_cfg = yaml.safe_load(_f) or {}
|
||||
_cfg = _expand_env_vars(_cfg)
|
||||
_model_cfg = _cfg.get("model", {})
|
||||
if not job.get("model"):
|
||||
if isinstance(_model_cfg, str):
|
||||
@@ -951,7 +1120,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
if prefill_file:
|
||||
pfpath = Path(prefill_file).expanduser()
|
||||
if not pfpath.is_absolute():
|
||||
pfpath = _hermes_home / pfpath
|
||||
pfpath = _get_hermes_home() / pfpath
|
||||
if pfpath.exists():
|
||||
try:
|
||||
with open(pfpath, "r", encoding="utf-8") as _pf:
|
||||
@@ -974,8 +1143,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
)
|
||||
from hermes_cli.auth import AuthError
|
||||
try:
|
||||
# Do not inject HERMES_INFERENCE_PROVIDER here. resolve_runtime_provider()
|
||||
# already prefers persisted config over stale shell/env overrides when
|
||||
# no explicit provider is requested. Passing the env var here short-
|
||||
# circuits that precedence and can resurrect old providers (for
|
||||
# example DeepSeek) for cron jobs that do not pin provider/model.
|
||||
runtime_kwargs = {
|
||||
"requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"),
|
||||
"requested": job.get("provider"),
|
||||
}
|
||||
if job.get("base_url"):
|
||||
runtime_kwargs["explicit_base_url"] = job.get("base_url")
|
||||
@@ -1270,12 +1444,13 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
|
||||
Returns:
|
||||
Number of jobs executed (0 if another tick is already running)
|
||||
"""
|
||||
_LOCK_DIR.mkdir(parents=True, exist_ok=True)
|
||||
lock_dir, lock_file = _get_lock_paths()
|
||||
lock_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Cross-platform file locking: fcntl on Unix, msvcrt on Windows
|
||||
lock_fd = None
|
||||
try:
|
||||
lock_fd = open(_LOCK_FILE, "w")
|
||||
lock_fd = open(lock_file, "w")
|
||||
if fcntl:
|
||||
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
elif msvcrt:
|
||||
|
||||
+1
-1
@@ -40,7 +40,7 @@ services:
|
||||
# - TEAMS_CLIENT_SECRET=${TEAMS_CLIENT_SECRET}
|
||||
# - TEAMS_TENANT_ID=${TEAMS_TENANT_ID}
|
||||
# - TEAMS_ALLOWED_USERS=${TEAMS_ALLOWED_USERS}
|
||||
# - TEAMS_PORT=3978
|
||||
# - TEAMS_PORT=${TEAMS_PORT:-3978}
|
||||
command: ["gateway", "run"]
|
||||
|
||||
dashboard:
|
||||
|
||||
@@ -86,6 +86,41 @@ if [ -d "$INSTALL_DIR/skills" ]; then
|
||||
python3 "$INSTALL_DIR/tools/skills_sync.py"
|
||||
fi
|
||||
|
||||
# Optionally start `hermes dashboard` as a side-process.
|
||||
#
|
||||
# Toggled by HERMES_DASHBOARD=1 (also accepts "true"/"yes", case-insensitive).
|
||||
# Host/port/TUI can be overridden via:
|
||||
# HERMES_DASHBOARD_HOST (default 0.0.0.0 — exposed outside the container)
|
||||
# HERMES_DASHBOARD_PORT (default 9119, matches `hermes dashboard` default)
|
||||
# HERMES_DASHBOARD_TUI (already honored by `hermes dashboard` itself)
|
||||
#
|
||||
# The dashboard is a long-lived server. We background it *before* the final
|
||||
# `exec hermes "$@"` so the user's chosen foreground command (chat, gateway,
|
||||
# sleep infinity, …) remains PID-of-interest for the container runtime. When
|
||||
# the container stops the whole process tree is torn down, so no explicit
|
||||
# cleanup is needed.
|
||||
case "${HERMES_DASHBOARD:-}" in
|
||||
1|true|TRUE|True|yes|YES|Yes)
|
||||
dash_host="${HERMES_DASHBOARD_HOST:-0.0.0.0}"
|
||||
dash_port="${HERMES_DASHBOARD_PORT:-9119}"
|
||||
dash_args=(--host "$dash_host" --port "$dash_port" --no-open)
|
||||
# Binding to anything other than localhost requires --insecure — the
|
||||
# dashboard refuses otherwise because it exposes API keys. Inside a
|
||||
# container this is the expected deployment (host reaches it via
|
||||
# published port), so opt in automatically.
|
||||
if [ "$dash_host" != "127.0.0.1" ] && [ "$dash_host" != "localhost" ]; then
|
||||
dash_args+=(--insecure)
|
||||
fi
|
||||
echo "Starting hermes dashboard on ${dash_host}:${dash_port} (background)"
|
||||
# Prefix dashboard output so it's distinguishable from the main
|
||||
# process in `docker logs`. stdbuf keeps the pipe line-buffered.
|
||||
(
|
||||
stdbuf -oL -eL hermes dashboard "${dash_args[@]}" 2>&1 \
|
||||
| sed -u 's/^/[dashboard] /'
|
||||
) &
|
||||
;;
|
||||
esac
|
||||
|
||||
# Final exec: two supported invocation patterns.
|
||||
#
|
||||
# docker run <image> -> exec `hermes` with no args (legacy default)
|
||||
|
||||
Binary file not shown.
@@ -0,0 +1,473 @@
|
||||
# Telegram DM User-Managed Multi-Session Topics Implementation Plan
|
||||
|
||||
> **For Hermes:** Use test-driven-development for implementation. Use subagent-driven-development only after this plan is split into small reviewed tasks.
|
||||
|
||||
**Goal:** Add an opt-in Telegram DM multi-session mode where Telegram user-created private-chat topics become independent Hermes session lanes, while the root DM becomes a system lobby.
|
||||
|
||||
**Architecture:** Rely on Telegram's native private-chat topic UI. Users create new topics with the `+` button; Hermes maps each `message_thread_id` to a separate session lane. Hermes does not create topics for normal `/new` flow and does not try to manage topic lifecycle beyond activation/status, root-lobby behavior, and restoring legacy sessions into a user-created topic.
|
||||
|
||||
**Tech Stack:** Hermes gateway, Telegram Bot API 9.4+, python-telegram-bot adapter, SQLite SessionDB / side tables, pytest.
|
||||
|
||||
---
|
||||
|
||||
## 1. Product decisions
|
||||
|
||||
### Accepted
|
||||
|
||||
- PR-quality implementation: migrations, tests, docs, backwards compatibility.
|
||||
- Use SQLite persistence, not JSON sidecars.
|
||||
- Live status suffixes in topic titles are out of MVP.
|
||||
- Topic title sync/editing is out of MVP except future-compatible storage if cheap.
|
||||
- User creates Telegram topics manually through the Telegram bot interface.
|
||||
- `/new` does **not** create Telegram topics.
|
||||
- Root/main DM becomes a system lobby after activation.
|
||||
- Existing Telegram behavior remains unchanged until the feature is activated/enabled.
|
||||
- Migration of old sessions is supported through `/topic` listing and `/topic <session_id>` restore inside a user-created topic.
|
||||
|
||||
### Telegram API assumptions verified from Bot API docs
|
||||
|
||||
- `getMe` returns bot `User` fields:
|
||||
- `has_topics_enabled`: forum/topic mode enabled in private chats.
|
||||
- `allows_users_to_create_topics`: users may create/delete topics in private chats.
|
||||
- `createForumTopic` works for private chats with a user, but MVP does not rely on it for normal flow.
|
||||
- `Message.message_thread_id` identifies a topic in private chats.
|
||||
- `sendMessage` supports `message_thread_id` for private-chat topics.
|
||||
- `pinChatMessage` is allowed in private chats.
|
||||
|
||||
---
|
||||
|
||||
## 2. Target UX
|
||||
|
||||
### 2.1 Activation from root/main DM
|
||||
|
||||
User sends:
|
||||
|
||||
```text
|
||||
/topic
|
||||
```
|
||||
|
||||
Hermes:
|
||||
|
||||
1. calls Telegram `getMe`;
|
||||
2. verifies `has_topics_enabled` and `allows_users_to_create_topics`;
|
||||
3. enables multi-session topic mode for this Telegram DM user/chat;
|
||||
4. sends an onboarding message;
|
||||
5. pins the onboarding message if configured;
|
||||
6. shows old/unlinked sessions that can be restored into topics.
|
||||
|
||||
Suggested onboarding text:
|
||||
|
||||
```text
|
||||
Multi-session mode is enabled.
|
||||
|
||||
Create new Hermes chats with the + button in this bot interface. Each Telegram topic is an independent Hermes session, so you can work on different tasks in parallel.
|
||||
|
||||
This main chat is reserved for system commands, status, and session management.
|
||||
|
||||
To restore an old session:
|
||||
1. Use /topic here to see unlinked sessions.
|
||||
2. Create a new topic with the + button.
|
||||
3. Send /topic <session_id> inside that topic.
|
||||
```
|
||||
|
||||
### 2.2 Root/main DM after activation
|
||||
|
||||
Root DM is a system lobby.
|
||||
|
||||
Allowed/system commands include at least:
|
||||
|
||||
- `/topic`
|
||||
- `/status`
|
||||
- `/sessions` if available
|
||||
- `/usage`
|
||||
- `/help`
|
||||
- `/platforms`
|
||||
|
||||
Normal user prompts in root DM do not enter the agent loop. Reply:
|
||||
|
||||
```text
|
||||
This main chat is reserved for system commands.
|
||||
|
||||
To chat with Hermes, create a new topic using the + button in this bot interface. Each topic works as an independent Hermes session.
|
||||
```
|
||||
|
||||
`/new` in root DM does not create a session/topic. Reply:
|
||||
|
||||
```text
|
||||
To start a new parallel Hermes chat, create a new topic with the + button in this bot interface.
|
||||
|
||||
Each topic is an independent Hermes session. Use /new inside a topic only if you want to replace that topic's current session.
|
||||
```
|
||||
|
||||
### 2.3 First message in a user-created topic
|
||||
|
||||
When a user creates a Telegram topic and sends the first message there:
|
||||
|
||||
1. Hermes receives a Telegram DM message with `message_thread_id`.
|
||||
2. Hermes derives the existing thread-aware `session_key` from `(platform=telegram, chat_type=dm, chat_id, thread_id)`.
|
||||
3. If no binding exists, Hermes creates a fresh Hermes session for this topic lane and persists the binding.
|
||||
4. The message runs through the normal agent loop for that lane.
|
||||
|
||||
### 2.4 `/new` inside a non-main topic
|
||||
|
||||
`/new` remains supported but replaces the session attached to the current topic lane.
|
||||
|
||||
Hermes should warn:
|
||||
|
||||
```text
|
||||
Started a new Hermes session in this topic.
|
||||
|
||||
Tip: for parallel work, create a new topic with the + button instead of using /new here. /new replaces the session attached to the current topic.
|
||||
```
|
||||
|
||||
### 2.5 `/topic` in root/main DM after activation
|
||||
|
||||
Shows:
|
||||
|
||||
- mode enabled/disabled;
|
||||
- last capability check result;
|
||||
- whether intro message is pinned if known;
|
||||
- count of known topic bindings;
|
||||
- list of old/unlinked sessions.
|
||||
|
||||
Example:
|
||||
|
||||
```text
|
||||
Telegram multi-session topics are enabled.
|
||||
|
||||
Create new Hermes chats with the + button in this bot interface.
|
||||
|
||||
Unlinked previous sessions:
|
||||
1. 2026-05-01 Research notes — id: abc123
|
||||
2. 2026-04-30 Deploy debugging — id: def456
|
||||
3. Untitled session — id: ghi789
|
||||
|
||||
To restore one:
|
||||
1. Create a new topic with the + button.
|
||||
2. Open that topic.
|
||||
3. Send /topic <id>
|
||||
```
|
||||
|
||||
### 2.6 `/topic` inside a non-main topic
|
||||
|
||||
Without args, show the current topic binding:
|
||||
|
||||
```text
|
||||
This topic is linked to:
|
||||
Session: Research notes
|
||||
ID: abc123
|
||||
|
||||
Use /new to replace this topic with a fresh session.
|
||||
For parallel work, create another topic with the + button.
|
||||
```
|
||||
|
||||
### 2.7 `/topic <session_id>` inside a non-main topic
|
||||
|
||||
Restore an old/unlinked session into the current user-created topic.
|
||||
|
||||
Behavior:
|
||||
|
||||
1. reject if not in Telegram DM topic;
|
||||
2. verify session belongs to the same Telegram user/chat or is a safe legacy root DM session for this user;
|
||||
3. reject if session is already linked to another active topic in MVP;
|
||||
4. `SessionStore.switch_session(current_topic_session_key, target_session_id)`;
|
||||
5. upsert binding with `managed_mode = restored`;
|
||||
6. send two messages into the topic:
|
||||
- session restored confirmation;
|
||||
- last Hermes assistant message if available.
|
||||
|
||||
Example:
|
||||
|
||||
```text
|
||||
Session restored: Research notes
|
||||
|
||||
Last Hermes message:
|
||||
...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Persistence model
|
||||
|
||||
Use SQLite, but topic-mode schema changes are **explicit opt-in migrations**, not automatic startup reconciliation.
|
||||
|
||||
Important rollback-safety rule:
|
||||
|
||||
- upgrading Hermes and starting the gateway must not create Telegram topic-mode tables or columns;
|
||||
- old/default Telegram behavior must keep working on the existing `state.db`;
|
||||
- the first `/topic` activation path calls an idempotent explicit migration, then enables topic mode for that chat;
|
||||
- if activation fails before the migration is needed, the database remains in the pre-topic-mode shape.
|
||||
|
||||
### 3.1 No eager `sessions` table mutation for MVP
|
||||
|
||||
Do **not** add `chat_id`, `chat_type`, `thread_id`, or `session_key` columns to `sessions` as part of ordinary `SessionDB()` startup. The existing declarative `_reconcile_columns()` mechanism would add them eagerly on every process start, which violates the managed-migration requirement.
|
||||
|
||||
For MVP, keep origin/session-lane data in topic-specific side tables created only by the explicit `/topic` migration. Legacy unlinked sessions can be discovered conservatively from existing data (`source = telegram`, `user_id = current Telegram user`) plus absence from topic bindings.
|
||||
|
||||
If future PRs need richer origin metadata for all gateway sessions, introduce it behind a separate explicit migration/command or a compatibility-reviewed schema bump.
|
||||
|
||||
### 3.2 Explicit `/topic` migration API
|
||||
|
||||
Add an idempotent method such as:
|
||||
|
||||
```python
|
||||
def apply_telegram_topic_migration(self) -> None: ...
|
||||
```
|
||||
|
||||
It creates only topic-mode side tables/indexes and records:
|
||||
|
||||
```text
|
||||
state_meta.telegram_dm_topic_schema_version = 1
|
||||
```
|
||||
|
||||
This method is called from `/topic` activation/status paths before reading or writing topic-mode state. It is not called from generic `SessionDB.__init__`, gateway startup, CLI startup, or auto-maintenance.
|
||||
|
||||
### 3.3 `telegram_dm_topic_mode`
|
||||
|
||||
Stores per-user/chat activation state. Created only by `apply_telegram_topic_migration()`.
|
||||
|
||||
Suggested fields:
|
||||
|
||||
- `chat_id` primary key
|
||||
- `user_id`
|
||||
- `enabled`
|
||||
- `activated_at`
|
||||
- `updated_at`
|
||||
- `has_topics_enabled`
|
||||
- `allows_users_to_create_topics`
|
||||
- `capability_checked_at`
|
||||
- `intro_message_id`
|
||||
- `pinned_message_id`
|
||||
|
||||
### 3.4 `telegram_dm_topic_bindings`
|
||||
|
||||
Stores Telegram topic/thread to Hermes session binding. Created only by `apply_telegram_topic_migration()`.
|
||||
|
||||
Suggested fields:
|
||||
|
||||
- `chat_id`
|
||||
- `thread_id`
|
||||
- `user_id`
|
||||
- `session_key`
|
||||
- `session_id`
|
||||
- `managed_mode`
|
||||
- `auto`
|
||||
- `restored`
|
||||
- `new_replaced`
|
||||
- `linked_at`
|
||||
- `updated_at`
|
||||
|
||||
Recommended constraints:
|
||||
|
||||
- primary key `(chat_id, thread_id)`;
|
||||
- unique index on `session_id` for MVP to prevent one session linked to multiple topics;
|
||||
- index `(user_id, chat_id)` for status/listing.
|
||||
|
||||
### 3.5 Unlinked session semantics
|
||||
|
||||
For MVP, a session is unlinked if:
|
||||
|
||||
- `source = telegram`;
|
||||
- `user_id = current Telegram user`;
|
||||
- no row in `telegram_dm_topic_bindings` has `session_id = session_id`.
|
||||
|
||||
This is intentionally conservative until a future explicit migration adds richer cross-platform origin metadata.
|
||||
|
||||
Never dedupe by title.
|
||||
|
||||
---
|
||||
|
||||
## 4. Config
|
||||
|
||||
Suggested config block:
|
||||
|
||||
```yaml
|
||||
platforms:
|
||||
telegram:
|
||||
extra:
|
||||
multisession_topics:
|
||||
enabled: false
|
||||
mode: user_managed_topics
|
||||
root_chat_behavior: system_lobby
|
||||
pin_intro_message: true
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
||||
- `enabled: false` means existing Telegram behavior is unchanged.
|
||||
- Activation via `/topic` may create per-chat enabled state only if global config permits it.
|
||||
- `root_chat_behavior: system_lobby` is the MVP behavior for activated chats.
|
||||
|
||||
---
|
||||
|
||||
## 5. Command behavior summary
|
||||
|
||||
### `/topic` root/main DM
|
||||
|
||||
- If not activated: capability check, activate, send/pin onboarding, list unlinked sessions.
|
||||
- If activated: show status and unlinked sessions.
|
||||
|
||||
### `/topic` non-main topic
|
||||
|
||||
- Show current binding.
|
||||
|
||||
### `/topic <session_id>` root/main DM
|
||||
|
||||
Reject with instructions:
|
||||
|
||||
```text
|
||||
Create a new topic with the + button, open it, then send /topic <session_id> there to restore this session.
|
||||
```
|
||||
|
||||
### `/topic <session_id>` non-main topic
|
||||
|
||||
Restore that session into this topic if ownership/linking checks pass.
|
||||
|
||||
### `/new` root/main DM when activated
|
||||
|
||||
Reply with instructions to use the `+` button. Do not enter agent loop.
|
||||
|
||||
### `/new` non-main topic
|
||||
|
||||
Create a new session in the current topic lane, persist/update binding, warn that `+` is preferred for parallel work.
|
||||
|
||||
### Normal text root/main DM when activated
|
||||
|
||||
Reply with system-lobby instruction. Do not enter agent loop.
|
||||
|
||||
### Normal text non-main topic
|
||||
|
||||
Normal Hermes agent flow for that topic's session lane.
|
||||
|
||||
---
|
||||
|
||||
## 6. PR breakdown
|
||||
|
||||
### PR 1 — Explicit topic-mode schema migration
|
||||
|
||||
**Goal:** Add rollback-safe SQLite support for Telegram topic mode without mutating `state.db` on ordinary upgrade/startup.
|
||||
|
||||
**Files likely touched:**
|
||||
|
||||
- `hermes_state.py`
|
||||
- tests under `tests/`
|
||||
|
||||
**Tests first:**
|
||||
|
||||
1. opening an old/current DB with `SessionDB()` does not create topic-mode tables or `sessions` origin columns;
|
||||
2. calling `apply_telegram_topic_migration()` creates `telegram_dm_topic_mode` and `telegram_dm_topic_bindings` idempotently;
|
||||
3. migration records `state_meta.telegram_dm_topic_schema_version = 1`.
|
||||
|
||||
### PR 2 — Topic mode activation and binding APIs
|
||||
|
||||
**Goal:** Add SQLite persistence for activation and topic bindings.
|
||||
|
||||
**Tests first:**
|
||||
|
||||
1. enable/check mode row round-trips;
|
||||
2. binding upsert and lookup by `(chat_id, user_id, thread_id)`;
|
||||
3. linked sessions are excluded from unlinked list.
|
||||
|
||||
### PR 3 — `/topic` activation/status command
|
||||
|
||||
**Goal:** Implement root activation/status/listing behavior.
|
||||
|
||||
**Tests first:**
|
||||
|
||||
1. `/topic` in root checks `getMe` capabilities and records activation;
|
||||
2. capability failure returns readable instructions;
|
||||
3. activated root `/topic` lists unlinked sessions.
|
||||
|
||||
### PR 4 — System lobby behavior
|
||||
|
||||
**Goal:** Prevent root chat from entering agent loop after activation.
|
||||
|
||||
**Tests first:**
|
||||
|
||||
1. normal text in activated root returns lobby instruction;
|
||||
2. `/new` in activated root returns `+` button instruction;
|
||||
3. non-activated root behavior is unchanged.
|
||||
|
||||
### PR 5 — Auto-bind user-created topics
|
||||
|
||||
**Goal:** First message in non-main topic creates/uses an independent session lane.
|
||||
|
||||
**Tests first:**
|
||||
|
||||
1. new topic message creates binding with `auto_created`;
|
||||
2. repeated topic message reuses same binding/lane;
|
||||
3. two topics in same DM do not share sessions.
|
||||
|
||||
### PR 6 — Restore legacy sessions into a topic
|
||||
|
||||
**Goal:** Implement `/topic <session_id>` in non-main topics.
|
||||
|
||||
**Tests first:**
|
||||
|
||||
1. root `/topic <id>` rejects with instructions;
|
||||
2. topic `/topic <id>` switches current topic lane to target session;
|
||||
3. restore rejects sessions from other users/chats;
|
||||
4. restore rejects already-linked sessions;
|
||||
5. restore emits confirmation and last Hermes assistant message.
|
||||
|
||||
### PR 7 — `/new` inside topic updates binding
|
||||
|
||||
**Goal:** Keep existing `/new` semantics but persist topic binding replacement.
|
||||
|
||||
**Tests first:**
|
||||
|
||||
1. `/new` in topic creates a new session for same topic lane;
|
||||
2. binding updates to `managed_mode = new_replaced`;
|
||||
3. response includes guidance to use `+` for parallel work.
|
||||
|
||||
### PR 8 — Docs and polish
|
||||
|
||||
**Goal:** Document the feature and Telegram setup.
|
||||
|
||||
**Files likely touched:**
|
||||
|
||||
- `website/docs/user-guide/messaging/telegram.md`
|
||||
- maybe `website/docs/user-guide/sessions.md`
|
||||
|
||||
Docs must explain:
|
||||
|
||||
- BotFather/Telegram settings for topic mode and user-created topics;
|
||||
- `/topic` activation;
|
||||
- root system lobby;
|
||||
- using `+` for new parallel chats;
|
||||
- restoring old sessions with `/topic <id>` inside a topic;
|
||||
- limitations.
|
||||
|
||||
---
|
||||
|
||||
## 7. Testing / quality gates
|
||||
|
||||
Run targeted tests after each TDD cycle, then broader tests before completion.
|
||||
|
||||
Suggested commands after inspection confirms test paths:
|
||||
|
||||
```bash
|
||||
python -m pytest tests/test_hermes_state.py -q
|
||||
python -m pytest tests/gateway/ -q
|
||||
python -m pytest tests/ -o 'addopts=' -q
|
||||
```
|
||||
|
||||
Do not ship without verifying disabled-feature backwards compatibility.
|
||||
|
||||
---
|
||||
|
||||
## 8. Definition of done for MVP
|
||||
|
||||
- `/topic` activates/checks Telegram DM multi-session mode.
|
||||
- Root DM becomes a system lobby after activation.
|
||||
- Onboarding message tells users to create new chats with the Telegram `+` button.
|
||||
- Onboarding message can be pinned in private chat.
|
||||
- User-created topics automatically become independent Hermes session lanes.
|
||||
- `/new` in root gives instructions, not a new agent run.
|
||||
- `/new` in a topic creates a new session in that topic and warns that `+` is preferred for parallel work.
|
||||
- `/topic` in root lists unlinked old sessions.
|
||||
- `/topic <session_id>` inside a topic restores that session and sends confirmation + last Hermes assistant message.
|
||||
- Ownership checks prevent restoring other users' sessions.
|
||||
- Already-linked sessions are not restored into a second topic in MVP.
|
||||
- Existing Telegram behavior is unchanged when the feature is disabled.
|
||||
- Tests and docs are included.
|
||||
@@ -40,7 +40,7 @@ This directory contains the integration layer between **hermes-agent's** tool-ca
|
||||
- `evaluate_log()` for saving eval results to JSON + samples.jsonl
|
||||
|
||||
**HermesAgentBaseEnv** (`hermes_base_env.py`) extends BaseEnv with hermes-agent specifics:
|
||||
- Sets `os.environ["TERMINAL_ENV"]` to configure the terminal backend (local, docker, modal, daytona, ssh, singularity)
|
||||
- Sets `os.environ["TERMINAL_ENV"]` to configure the terminal backend (local, docker, ssh, singularity, modal, daytona, vercel_sandbox)
|
||||
- Resolves hermes-agent toolsets via `_resolve_tools_for_group()` (calls `get_tool_definitions()` which queries `tools/registry.py`)
|
||||
- Implements `collect_trajectory()` which runs the full agent loop and computes rewards
|
||||
- Supports two-phase operation (Phase 1: OpenAI server, Phase 2: VLLM ManagedServer)
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 115 KiB |
+144
-14
@@ -36,6 +36,26 @@ def _coerce_bool(value: Any, default: bool = True) -> bool:
|
||||
return is_truthy_value(value, default=default)
|
||||
|
||||
|
||||
def _coerce_float(value: Any, default: float) -> float:
|
||||
"""Coerce numeric config values, falling back on malformed input."""
|
||||
if value is None:
|
||||
return default
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def _coerce_int(value: Any, default: int) -> int:
|
||||
"""Coerce integer config values, falling back on malformed input."""
|
||||
if value is None:
|
||||
return default
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> str:
|
||||
"""Normalize unauthorized DM behavior to a supported value."""
|
||||
if isinstance(value, str):
|
||||
@@ -45,6 +65,15 @@ def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> st
|
||||
return default
|
||||
|
||||
|
||||
def _normalize_notice_delivery(value: Any, default: str = "public") -> str:
|
||||
"""Normalize notice delivery mode to a supported value."""
|
||||
if isinstance(value, str):
|
||||
normalized = value.strip().lower()
|
||||
if normalized in {"public", "private"}:
|
||||
return normalized
|
||||
return default
|
||||
|
||||
|
||||
# Module-level cache for bundled platform plugin names (lives outside the
|
||||
# enum so it doesn't become an accidental enum member).
|
||||
_Platform__bundled_plugin_names: Optional[set] = None
|
||||
@@ -157,18 +186,24 @@ class HomeChannel:
|
||||
Default destination for a platform.
|
||||
|
||||
When a cron job specifies deliver="telegram" without a specific chat ID,
|
||||
messages are sent to this home channel.
|
||||
messages are sent to this home channel. Thread-aware platforms may also
|
||||
store a thread/topic ID so the bare platform target routes to the exact
|
||||
conversation where /sethome was run.
|
||||
"""
|
||||
platform: Platform
|
||||
chat_id: str
|
||||
name: str # Human-readable name for display
|
||||
thread_id: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
result = {
|
||||
"platform": self.platform.value,
|
||||
"chat_id": self.chat_id,
|
||||
"name": self.name,
|
||||
}
|
||||
if self.thread_id:
|
||||
result["thread_id"] = self.thread_id
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "HomeChannel":
|
||||
@@ -176,6 +211,7 @@ class HomeChannel:
|
||||
platform=Platform(data["platform"]),
|
||||
chat_id=str(data["chat_id"]),
|
||||
name=data.get("name", "Home"),
|
||||
thread_id=str(data["thread_id"]) if data.get("thread_id") else None,
|
||||
)
|
||||
|
||||
|
||||
@@ -235,15 +271,23 @@ class PlatformConfig:
|
||||
# - "first": Only first chunk threads to user's message (default)
|
||||
# - "all": All chunks in multi-part replies thread to user's message
|
||||
reply_to_mode: str = "first"
|
||||
|
||||
|
||||
# Whether the gateway is allowed to send "♻️ Gateway online" /
|
||||
# "♻ Gateway restarted" lifecycle notifications on this platform.
|
||||
# Default True preserves prior behavior. Set False on platforms used
|
||||
# by end users (e.g. Slack) where operator-flavored restart pings are
|
||||
# noise; keep True for back-channels where the operator wants them.
|
||||
gateway_restart_notification: bool = True
|
||||
|
||||
# Platform-specific settings
|
||||
extra: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
result = {
|
||||
"enabled": self.enabled,
|
||||
"extra": self.extra,
|
||||
"reply_to_mode": self.reply_to_mode,
|
||||
"gateway_restart_notification": self.gateway_restart_notification,
|
||||
}
|
||||
if self.token:
|
||||
result["token"] = self.token
|
||||
@@ -252,19 +296,22 @@ class PlatformConfig:
|
||||
if self.home_channel:
|
||||
result["home_channel"] = self.home_channel.to_dict()
|
||||
return result
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "PlatformConfig":
|
||||
home_channel = None
|
||||
if "home_channel" in data:
|
||||
home_channel = HomeChannel.from_dict(data["home_channel"])
|
||||
|
||||
|
||||
return cls(
|
||||
enabled=_coerce_bool(data.get("enabled"), False),
|
||||
token=data.get("token"),
|
||||
api_key=data.get("api_key"),
|
||||
home_channel=home_channel,
|
||||
reply_to_mode=data.get("reply_to_mode", "first"),
|
||||
gateway_restart_notification=_coerce_bool(
|
||||
data.get("gateway_restart_notification"), True
|
||||
),
|
||||
extra=data.get("extra", {}),
|
||||
)
|
||||
|
||||
@@ -301,13 +348,13 @@ class StreamingConfig:
|
||||
if not data:
|
||||
return cls()
|
||||
return cls(
|
||||
enabled=data.get("enabled", False),
|
||||
enabled=_coerce_bool(data.get("enabled"), False),
|
||||
transport=data.get("transport", "edit"),
|
||||
edit_interval=float(data.get("edit_interval", 1.0)),
|
||||
buffer_threshold=int(data.get("buffer_threshold", 40)),
|
||||
edit_interval=_coerce_float(data.get("edit_interval"), 1.0),
|
||||
buffer_threshold=_coerce_int(data.get("buffer_threshold"), 40),
|
||||
cursor=data.get("cursor", " ▉"),
|
||||
fresh_final_after_seconds=float(
|
||||
data.get("fresh_final_after_seconds", 60.0)
|
||||
fresh_final_after_seconds=_coerce_float(
|
||||
data.get("fresh_final_after_seconds"), 60.0
|
||||
),
|
||||
)
|
||||
|
||||
@@ -572,6 +619,17 @@ class GatewayConfig:
|
||||
)
|
||||
return self.unauthorized_dm_behavior
|
||||
|
||||
def get_notice_delivery(self, platform: Optional[Platform] = None) -> str:
|
||||
"""Return the effective notice-delivery mode for a platform."""
|
||||
if platform:
|
||||
platform_cfg = self.platforms.get(platform)
|
||||
if platform_cfg and "notice_delivery" in platform_cfg.extra:
|
||||
return _normalize_notice_delivery(
|
||||
platform_cfg.extra.get("notice_delivery"),
|
||||
"public",
|
||||
)
|
||||
return "public"
|
||||
|
||||
|
||||
def load_gateway_config() -> GatewayConfig:
|
||||
"""
|
||||
@@ -687,6 +745,11 @@ def load_gateway_config() -> GatewayConfig:
|
||||
platform_cfg.get("unauthorized_dm_behavior"),
|
||||
gw_data.get("unauthorized_dm_behavior", "pair"),
|
||||
)
|
||||
if "notice_delivery" in platform_cfg:
|
||||
bridged["notice_delivery"] = _normalize_notice_delivery(
|
||||
platform_cfg.get("notice_delivery"),
|
||||
"public",
|
||||
)
|
||||
if "reply_prefix" in platform_cfg:
|
||||
bridged["reply_prefix"] = platform_cfg["reply_prefix"]
|
||||
if "reply_in_thread" in platform_cfg:
|
||||
@@ -793,12 +856,36 @@ def load_gateway_config() -> GatewayConfig:
|
||||
):
|
||||
if yaml_key in allow_mentions_cfg and not os.getenv(env_key):
|
||||
os.environ[env_key] = str(allow_mentions_cfg[yaml_key]).lower()
|
||||
# reply_to_mode: top-level preferred, falls back to extra.reply_to_mode
|
||||
# YAML 1.1 parses bare 'off' as boolean False — coerce to string "off".
|
||||
_discord_extra = discord_cfg.get("extra") if isinstance(discord_cfg.get("extra"), dict) else {}
|
||||
_discord_rtm = (
|
||||
discord_cfg["reply_to_mode"] if "reply_to_mode" in discord_cfg
|
||||
else _discord_extra.get("reply_to_mode")
|
||||
)
|
||||
if _discord_rtm is not None and not os.getenv("DISCORD_REPLY_TO_MODE"):
|
||||
_rtm_str = "off" if _discord_rtm is False else str(_discord_rtm).lower()
|
||||
os.environ["DISCORD_REPLY_TO_MODE"] = _rtm_str
|
||||
|
||||
# Bridge top-level require_mention to Telegram when the telegram: section
|
||||
# does not already provide one. Users often write "require_mention: true"
|
||||
# at the top level alongside group_sessions_per_user, expecting it to work
|
||||
# the same way (#3979).
|
||||
_tl_require_mention = yaml_cfg.get("require_mention")
|
||||
if _tl_require_mention is not None:
|
||||
_tg_section = yaml_cfg.get("telegram") or {}
|
||||
if "require_mention" not in _tg_section:
|
||||
_tg_plat = platforms_data.setdefault(Platform.TELEGRAM.value, {})
|
||||
_tg_extra = _tg_plat.setdefault("extra", {})
|
||||
_tg_extra.setdefault("require_mention", _tl_require_mention)
|
||||
|
||||
# Telegram settings → env vars (env vars take precedence)
|
||||
telegram_cfg = yaml_cfg.get("telegram", {})
|
||||
if isinstance(telegram_cfg, dict):
|
||||
if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"):
|
||||
os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower()
|
||||
# Prefer telegram.require_mention; fall back to the top-level shorthand.
|
||||
_effective_rm = telegram_cfg.get("require_mention", yaml_cfg.get("require_mention"))
|
||||
if _effective_rm is not None and not os.getenv("TELEGRAM_REQUIRE_MENTION"):
|
||||
os.environ["TELEGRAM_REQUIRE_MENTION"] = str(_effective_rm).lower()
|
||||
if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"):
|
||||
os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"])
|
||||
frc = telegram_cfg.get("free_response_chats")
|
||||
@@ -815,6 +902,16 @@ def load_gateway_config() -> GatewayConfig:
|
||||
os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
|
||||
if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
|
||||
os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
|
||||
# reply_to_mode: top-level preferred, falls back to extra.reply_to_mode
|
||||
# YAML 1.1 parses bare 'off' as boolean False — coerce to string "off".
|
||||
_telegram_extra = telegram_cfg.get("extra") if isinstance(telegram_cfg.get("extra"), dict) else {}
|
||||
_telegram_rtm = (
|
||||
telegram_cfg["reply_to_mode"] if "reply_to_mode" in telegram_cfg
|
||||
else _telegram_extra.get("reply_to_mode")
|
||||
)
|
||||
if _telegram_rtm is not None and not os.getenv("TELEGRAM_REPLY_TO_MODE"):
|
||||
_rtm_str = "off" if _telegram_rtm is False else str(_telegram_rtm).lower()
|
||||
os.environ["TELEGRAM_REPLY_TO_MODE"] = _rtm_str
|
||||
allowed_users = telegram_cfg.get("allow_from")
|
||||
if allowed_users is not None and not os.getenv("TELEGRAM_ALLOWED_USERS"):
|
||||
if isinstance(allowed_users, list):
|
||||
@@ -900,6 +997,12 @@ def load_gateway_config() -> GatewayConfig:
|
||||
if "dm_mention_threads" in matrix_cfg and not os.getenv("MATRIX_DM_MENTION_THREADS"):
|
||||
os.environ["MATRIX_DM_MENTION_THREADS"] = str(matrix_cfg["dm_mention_threads"]).lower()
|
||||
|
||||
# Feishu settings → env vars (env vars take precedence)
|
||||
feishu_cfg = yaml_cfg.get("feishu", {})
|
||||
if isinstance(feishu_cfg, dict):
|
||||
if "allow_bots" in feishu_cfg and not os.getenv("FEISHU_ALLOW_BOTS"):
|
||||
os.environ["FEISHU_ALLOW_BOTS"] = str(feishu_cfg["allow_bots"]).lower()
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to process config.yaml — falling back to .env / gateway.json values. "
|
||||
@@ -1020,6 +1123,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id=telegram_home,
|
||||
name=os.getenv("TELEGRAM_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("TELEGRAM_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Discord
|
||||
@@ -1036,6 +1140,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
platform=Platform.DISCORD,
|
||||
chat_id=discord_home,
|
||||
name=os.getenv("DISCORD_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("DISCORD_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Reply threading mode for Discord (off/first/all)
|
||||
@@ -1051,7 +1156,15 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
if Platform.WHATSAPP not in config.platforms:
|
||||
config.platforms[Platform.WHATSAPP] = PlatformConfig()
|
||||
config.platforms[Platform.WHATSAPP].enabled = True
|
||||
|
||||
whatsapp_home = os.getenv("WHATSAPP_HOME_CHANNEL")
|
||||
if whatsapp_home and Platform.WHATSAPP in config.platforms:
|
||||
config.platforms[Platform.WHATSAPP].home_channel = HomeChannel(
|
||||
platform=Platform.WHATSAPP,
|
||||
chat_id=whatsapp_home,
|
||||
name=os.getenv("WHATSAPP_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("WHATSAPP_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Slack
|
||||
slack_token = os.getenv("SLACK_BOT_TOKEN")
|
||||
if slack_token:
|
||||
@@ -1077,6 +1190,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
platform=Platform.SLACK,
|
||||
chat_id=slack_home,
|
||||
name=os.getenv("SLACK_HOME_CHANNEL_NAME", ""),
|
||||
thread_id=os.getenv("SLACK_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Signal
|
||||
@@ -1097,6 +1211,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
platform=Platform.SIGNAL,
|
||||
chat_id=signal_home,
|
||||
name=os.getenv("SIGNAL_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("SIGNAL_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Mattermost
|
||||
@@ -1116,6 +1231,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
platform=Platform.MATTERMOST,
|
||||
chat_id=mattermost_home,
|
||||
name=os.getenv("MATTERMOST_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("MATTERMOST_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Matrix
|
||||
@@ -1147,6 +1263,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
platform=Platform.MATRIX,
|
||||
chat_id=matrix_home,
|
||||
name=os.getenv("MATRIX_HOME_ROOM_NAME", "Home"),
|
||||
thread_id=os.getenv("MATRIX_HOME_ROOM_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Home Assistant
|
||||
@@ -1180,6 +1297,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
platform=Platform.EMAIL,
|
||||
chat_id=email_home,
|
||||
name=os.getenv("EMAIL_HOME_ADDRESS_NAME", "Home"),
|
||||
thread_id=os.getenv("EMAIL_HOME_ADDRESS_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# SMS (Twilio)
|
||||
@@ -1195,6 +1313,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
platform=Platform.SMS,
|
||||
chat_id=sms_home,
|
||||
name=os.getenv("SMS_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("SMS_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# API Server
|
||||
@@ -1257,6 +1376,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
platform=Platform.DINGTALK,
|
||||
chat_id=dingtalk_home,
|
||||
name=os.getenv("DINGTALK_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("DINGTALK_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Feishu / Lark
|
||||
@@ -1284,6 +1404,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
platform=Platform.FEISHU,
|
||||
chat_id=feishu_home,
|
||||
name=os.getenv("FEISHU_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("FEISHU_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# WeCom (Enterprise WeChat)
|
||||
@@ -1306,6 +1427,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
platform=Platform.WECOM,
|
||||
chat_id=wecom_home,
|
||||
name=os.getenv("WECOM_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("WECOM_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# WeCom callback mode (self-built apps)
|
||||
@@ -1364,6 +1486,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
platform=Platform.WEIXIN,
|
||||
chat_id=weixin_home,
|
||||
name=os.getenv("WEIXIN_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("WEIXIN_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# BlueBubbles (iMessage)
|
||||
@@ -1387,6 +1510,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
platform=Platform.BLUEBUBBLES,
|
||||
chat_id=bluebubbles_home,
|
||||
name=os.getenv("BLUEBUBBLES_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("BLUEBUBBLES_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# QQ (Official Bot API v2)
|
||||
@@ -1424,6 +1548,11 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
platform=Platform.QQBOT,
|
||||
chat_id=qq_home,
|
||||
name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
|
||||
thread_id=(
|
||||
os.getenv("QQBOT_HOME_CHANNEL_THREAD_ID")
|
||||
or os.getenv("QQ_HOME_CHANNEL_THREAD_ID")
|
||||
or None
|
||||
),
|
||||
)
|
||||
|
||||
# Yuanbao — YUANBAO_APP_ID preferred
|
||||
@@ -1454,6 +1583,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
platform=Platform.YUANBAO,
|
||||
chat_id=yuanbao_home,
|
||||
name=os.getenv("YUANBAO_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("YUANBAO_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
yuanbao_dm_policy = os.getenv("YUANBAO_DM_POLICY")
|
||||
if yuanbao_dm_policy:
|
||||
|
||||
+9
-7
@@ -53,9 +53,10 @@ class DeliveryTarget:
|
||||
- "telegram" → Telegram home channel
|
||||
- "telegram:123456" → specific Telegram chat
|
||||
"""
|
||||
target = target.strip().lower()
|
||||
target_stripped = target.strip()
|
||||
target_lower = target_stripped.lower()
|
||||
|
||||
if target == "origin":
|
||||
if target_lower == "origin":
|
||||
if origin:
|
||||
return cls(
|
||||
platform=origin.platform,
|
||||
@@ -67,13 +68,14 @@ class DeliveryTarget:
|
||||
# Fallback to local if no origin
|
||||
return cls(platform=Platform.LOCAL, is_origin=True)
|
||||
|
||||
if target == "local":
|
||||
if target_lower == "local":
|
||||
return cls(platform=Platform.LOCAL)
|
||||
|
||||
# Check for platform:chat_id or platform:chat_id:thread_id format
|
||||
if ":" in target:
|
||||
parts = target.split(":", 2)
|
||||
platform_str = parts[0]
|
||||
# Use the original case for chat_id/thread_id to preserve case-sensitive IDs
|
||||
if ":" in target_stripped:
|
||||
parts = target_stripped.split(":", 2)
|
||||
platform_str = parts[0].lower() # Platform names are case-insensitive
|
||||
chat_id = parts[1] if len(parts) > 1 else None
|
||||
thread_id = parts[2] if len(parts) > 2 else None
|
||||
try:
|
||||
@@ -85,7 +87,7 @@ class DeliveryTarget:
|
||||
|
||||
# Just a platform name (use home channel)
|
||||
try:
|
||||
platform = Platform(target)
|
||||
platform = Platform(target_lower)
|
||||
return cls(platform=platform)
|
||||
except ValueError:
|
||||
# Unknown platform, treat as local
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
"""Shared HTTP client factory for long-lived platform adapters.
|
||||
|
||||
Gateway messaging platforms (QQ Bot, Feishu, WeCom, DingTalk, Signal,
|
||||
BlueBubbles, WeCom-callback) keep a persistent ``httpx.AsyncClient``
|
||||
alive for the adapter's lifetime. That amortises TLS/connection setup
|
||||
across many API calls, but it also means the process's file-descriptor
|
||||
pressure is sensitive to how aggressively the pool recycles idle keep-
|
||||
alive connections.
|
||||
|
||||
httpx's default ``keepalive_expiry`` is 5 seconds. On macOS behind
|
||||
Cloudflare Warp (and other transparent proxies), peer-initiated FIN can
|
||||
sit in ``CLOSE_WAIT`` longer than that before the local socket actually
|
||||
drains — which, multiplied across 7 long-lived adapters plus the LLM
|
||||
client and MCP clients, walks straight into the default 256 fd limit.
|
||||
See #18451.
|
||||
|
||||
``platform_httpx_limits()`` returns a tighter ``httpx.Limits`` the
|
||||
adapter factories use instead of the httpx default. The values chosen:
|
||||
|
||||
* ``max_keepalive_connections=10`` — plenty for any single adapter;
|
||||
platform APIs rarely parallelise beyond this.
|
||||
* ``keepalive_expiry=2.0`` — close idle sockets aggressively so a
|
||||
proxy's lingering CLOSE_WAIT window can't starve the process.
|
||||
|
||||
Override via ``HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY`` /
|
||||
``HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE`` env vars when tuning under load.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
try:
|
||||
import httpx
|
||||
except ImportError: # pragma: no cover — optional dep
|
||||
httpx = None # type: ignore[assignment]
|
||||
|
||||
|
||||
_DEFAULT_KEEPALIVE_EXPIRY_S = 2.0
|
||||
_DEFAULT_MAX_KEEPALIVE = 10
|
||||
|
||||
|
||||
def platform_httpx_limits() -> "httpx.Limits | None":
|
||||
"""Return ``httpx.Limits`` tuned for persistent platform-adapter clients.
|
||||
|
||||
Returns ``None`` when httpx isn't importable, so callers can fall
|
||||
back to httpx's built-in default without a hard dependency on this
|
||||
helper being reachable.
|
||||
"""
|
||||
if httpx is None:
|
||||
return None
|
||||
|
||||
def _env_float(name: str, default: float) -> float:
|
||||
raw = os.environ.get(name, "").strip()
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
val = float(raw)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
return val if val > 0 else default
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
raw = os.environ.get(name, "").strip()
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
val = int(raw)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
return val if val > 0 else default
|
||||
|
||||
keepalive_expiry = _env_float(
|
||||
"HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY", _DEFAULT_KEEPALIVE_EXPIRY_S
|
||||
)
|
||||
max_keepalive = _env_int(
|
||||
"HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE", _DEFAULT_MAX_KEEPALIVE
|
||||
)
|
||||
|
||||
return httpx.Limits(
|
||||
max_keepalive_connections=max_keepalive,
|
||||
# Leave max_connections at httpx default (100) — plenty of headroom.
|
||||
keepalive_expiry=keepalive_expiry,
|
||||
)
|
||||
+304
-33
@@ -2,8 +2,8 @@
|
||||
OpenAI-compatible API server platform adapter.
|
||||
|
||||
Exposes an HTTP server with endpoints:
|
||||
- POST /v1/chat/completions — OpenAI Chat Completions format (stateless; opt-in session continuity via X-Hermes-Session-Id header)
|
||||
- POST /v1/responses — OpenAI Responses API format (stateful via previous_response_id)
|
||||
- POST /v1/chat/completions — OpenAI Chat Completions format (stateless; opt-in session continuity via X-Hermes-Session-Id header; opt-in long-term memory scoping via X-Hermes-Session-Key header)
|
||||
- POST /v1/responses — OpenAI Responses API format (stateful via previous_response_id; X-Hermes-Session-Key supported)
|
||||
- GET /v1/responses/{response_id} — Retrieve a stored response
|
||||
- DELETE /v1/responses/{response_id} — Delete a stored response
|
||||
- GET /v1/models — lists hermes-agent as an available model
|
||||
@@ -56,12 +56,20 @@ logger = logging.getLogger(__name__)
|
||||
DEFAULT_HOST = "127.0.0.1"
|
||||
DEFAULT_PORT = 8642
|
||||
MAX_STORED_RESPONSES = 100
|
||||
MAX_REQUEST_BYTES = 1_000_000 # 1 MB default limit for POST bodies
|
||||
MAX_REQUEST_BYTES = 10_000_000 # 10 MB — accommodates long agent conversations with tool calls
|
||||
CHAT_COMPLETIONS_SSE_KEEPALIVE_SECONDS = 30.0
|
||||
MAX_NORMALIZED_TEXT_LENGTH = 65_536 # 64 KB cap for normalized content parts
|
||||
MAX_CONTENT_LIST_SIZE = 1_000 # Max items when content is an array
|
||||
|
||||
|
||||
def _coerce_port(value: Any, default: int = DEFAULT_PORT) -> int:
|
||||
"""Parse a listen port without letting malformed env/config values crash startup."""
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def _normalize_chat_content(
|
||||
content: Any, *, _max_depth: int = 10, _depth: int = 0,
|
||||
) -> str:
|
||||
@@ -573,7 +581,10 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
super().__init__(config, Platform.API_SERVER)
|
||||
extra = config.extra or {}
|
||||
self._host: str = extra.get("host", os.getenv("API_SERVER_HOST", DEFAULT_HOST))
|
||||
self._port: int = int(extra.get("port", os.getenv("API_SERVER_PORT", str(DEFAULT_PORT))))
|
||||
raw_port = extra.get("port")
|
||||
if raw_port is None:
|
||||
raw_port = os.getenv("API_SERVER_PORT", str(DEFAULT_PORT))
|
||||
self._port: int = _coerce_port(raw_port, DEFAULT_PORT)
|
||||
self._api_key: str = extra.get("key", os.getenv("API_SERVER_KEY", ""))
|
||||
self._cors_origins: tuple[str, ...] = self._parse_cors_origins(
|
||||
extra.get("cors_origins", os.getenv("API_SERVER_CORS_ORIGINS", "")),
|
||||
@@ -687,6 +698,71 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
status=401,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Session header helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
# Soft length cap for session identifiers. Headers are bounded in
|
||||
# aggregate by aiohttp (``client_max_size`` / default 8 KiB per
|
||||
# header), but we impose a tighter limit on the session headers so a
|
||||
# caller can't burn memory by passing a multi-kilobyte "session key".
|
||||
# 256 chars is well above any realistic stable channel identifier
|
||||
# (e.g. ``agent:main:webui:dm:user-42``) while staying small enough
|
||||
# that the sanitized form is safe to pass into Honcho / state.db.
|
||||
_MAX_SESSION_HEADER_LEN = 256
|
||||
|
||||
def _parse_session_key_header(
|
||||
self, request: "web.Request"
|
||||
) -> tuple[Optional[str], Optional["web.Response"]]:
|
||||
"""Extract and validate the ``X-Hermes-Session-Key`` header.
|
||||
|
||||
The session key is a stable per-channel identifier that scopes
|
||||
long-term memory (e.g. Honcho sessions) across transcripts. It
|
||||
is independent of ``X-Hermes-Session-Id``: callers may send
|
||||
either, both, or neither.
|
||||
|
||||
Returns ``(session_key, None)`` on success (with an empty/absent
|
||||
header yielding ``None`` for the key), or ``(None, error_response)``
|
||||
on validation failure.
|
||||
|
||||
Security: like session continuation, accepting a caller-supplied
|
||||
memory scope requires API-key authentication so that an
|
||||
unauthenticated client on a local-only server can't inject itself
|
||||
into another user's long-term memory scope by guessing a key.
|
||||
"""
|
||||
raw = request.headers.get("X-Hermes-Session-Key", "").strip()
|
||||
if not raw:
|
||||
return None, None
|
||||
|
||||
if not self._api_key:
|
||||
logger.warning(
|
||||
"X-Hermes-Session-Key rejected: no API key configured. "
|
||||
"Set API_SERVER_KEY to enable long-term memory scoping."
|
||||
)
|
||||
return None, web.json_response(
|
||||
_openai_error(
|
||||
"X-Hermes-Session-Key requires API key authentication. "
|
||||
"Configure API_SERVER_KEY to enable this feature."
|
||||
),
|
||||
status=403,
|
||||
)
|
||||
|
||||
# Reject control characters that could enable header injection on
|
||||
# the echo path.
|
||||
if re.search(r'[\r\n\x00]', raw):
|
||||
return None, web.json_response(
|
||||
{"error": {"message": "Invalid session key", "type": "invalid_request_error"}},
|
||||
status=400,
|
||||
)
|
||||
|
||||
if len(raw) > self._MAX_SESSION_HEADER_LEN:
|
||||
return None, web.json_response(
|
||||
{"error": {"message": "Session key too long", "type": "invalid_request_error"}},
|
||||
status=400,
|
||||
)
|
||||
|
||||
return raw, None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Session DB helper
|
||||
# ------------------------------------------------------------------
|
||||
@@ -717,6 +793,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
tool_progress_callback=None,
|
||||
tool_start_callback=None,
|
||||
tool_complete_callback=None,
|
||||
gateway_session_key: Optional[str] = None,
|
||||
) -> Any:
|
||||
"""
|
||||
Create an AIAgent instance using the gateway's runtime config.
|
||||
@@ -725,12 +802,20 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
base_url, etc. from config.yaml / env vars. Toolsets are resolved
|
||||
from config.yaml platform_toolsets.api_server (same as all other
|
||||
gateway platforms), falling back to the hermes-api-server default.
|
||||
|
||||
``gateway_session_key`` is a stable per-channel identifier supplied
|
||||
by the client (via ``X-Hermes-Session-Key``). Unlike ``session_id``
|
||||
which scopes the short-term transcript and rotates on /new, this
|
||||
key is meant to persist across transcripts so long-term memory
|
||||
providers (e.g. Honcho) can scope their per-chat state correctly
|
||||
— matching the semantics of the native gateway's ``session_key``.
|
||||
"""
|
||||
from run_agent import AIAgent
|
||||
from gateway.run import _resolve_runtime_agent_kwargs, _resolve_gateway_model, _load_gateway_config
|
||||
from gateway.run import _resolve_runtime_agent_kwargs, _resolve_gateway_model, _load_gateway_config, GatewayRunner
|
||||
from hermes_cli.tools_config import _get_platform_tools
|
||||
|
||||
runtime_kwargs = _resolve_runtime_agent_kwargs()
|
||||
reasoning_config = GatewayRunner._load_reasoning_config()
|
||||
model = _resolve_gateway_model()
|
||||
|
||||
user_config = _load_gateway_config()
|
||||
@@ -740,7 +825,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
# Load fallback provider chain so the API server platform has the
|
||||
# same fallback behaviour as Telegram/Discord/Slack (fixes #4954).
|
||||
from gateway.run import GatewayRunner
|
||||
fallback_model = GatewayRunner._load_fallback_model()
|
||||
|
||||
agent = AIAgent(
|
||||
@@ -759,6 +843,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
tool_complete_callback=tool_complete_callback,
|
||||
session_db=self._ensure_session_db(),
|
||||
fallback_model=fallback_model,
|
||||
reasoning_config=reasoning_config,
|
||||
gateway_session_key=gateway_session_key,
|
||||
)
|
||||
return agent
|
||||
|
||||
@@ -842,6 +928,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"run_stop": True,
|
||||
"tool_progress_events": True,
|
||||
"session_continuity_header": "X-Hermes-Session-Id",
|
||||
"session_key_header": "X-Hermes-Session-Key",
|
||||
"cors": bool(self._cors_origins),
|
||||
},
|
||||
"endpoints": {
|
||||
@@ -913,6 +1000,15 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
status=400,
|
||||
)
|
||||
|
||||
# Allow caller to scope long-term memory (e.g. Honcho) with a
|
||||
# stable per-channel identifier via X-Hermes-Session-Key. This
|
||||
# is independent of X-Hermes-Session-Id: the key persists across
|
||||
# transcripts while the id rotates when the caller starts a new
|
||||
# transcript (i.e. /new semantics). See _parse_session_key_header.
|
||||
gateway_session_key, key_err = self._parse_session_key_header(request)
|
||||
if key_err is not None:
|
||||
return key_err
|
||||
|
||||
# Allow caller to continue an existing session by passing X-Hermes-Session-Id.
|
||||
# When provided, history is loaded from state.db instead of from the request body.
|
||||
#
|
||||
@@ -1047,11 +1143,13 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
tool_start_callback=_on_tool_start,
|
||||
tool_complete_callback=_on_tool_complete,
|
||||
agent_ref=agent_ref,
|
||||
gateway_session_key=gateway_session_key,
|
||||
))
|
||||
|
||||
return await self._write_sse_chat_completion(
|
||||
request, completion_id, model_name, created, _stream_q,
|
||||
agent_task, agent_ref, session_id=session_id,
|
||||
gateway_session_key=gateway_session_key,
|
||||
)
|
||||
|
||||
# Non-streaming: run the agent (with optional Idempotency-Key)
|
||||
@@ -1061,6 +1159,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
conversation_history=history,
|
||||
ephemeral_system_prompt=system_prompt,
|
||||
session_id=session_id,
|
||||
gateway_session_key=gateway_session_key,
|
||||
)
|
||||
|
||||
idempotency_key = request.headers.get("Idempotency-Key")
|
||||
@@ -1110,11 +1209,17 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
},
|
||||
}
|
||||
|
||||
return web.json_response(response_data, headers={"X-Hermes-Session-Id": session_id})
|
||||
response_headers = {
|
||||
"X-Hermes-Session-Id": result.get("session_id", session_id),
|
||||
}
|
||||
if gateway_session_key:
|
||||
response_headers["X-Hermes-Session-Key"] = gateway_session_key
|
||||
return web.json_response(response_data, headers=response_headers)
|
||||
|
||||
async def _write_sse_chat_completion(
|
||||
self, request: "web.Request", completion_id: str, model: str,
|
||||
created: int, stream_q, agent_task, agent_ref=None, session_id: str = None,
|
||||
gateway_session_key: str = None,
|
||||
) -> "web.StreamResponse":
|
||||
"""Write real streaming SSE from agent's stream_delta_callback queue.
|
||||
|
||||
@@ -1137,6 +1242,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
sse_headers.update(cors)
|
||||
if session_id:
|
||||
sse_headers["X-Hermes-Session-Id"] = session_id
|
||||
if gateway_session_key:
|
||||
sse_headers["X-Hermes-Session-Key"] = gateway_session_key
|
||||
response = web.StreamResponse(status=200, headers=sse_headers)
|
||||
await response.prepare(request)
|
||||
|
||||
@@ -1242,6 +1349,22 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
logger.info("SSE client disconnected; interrupted agent task %s", completion_id)
|
||||
except Exception as _exc:
|
||||
# Agent crashed mid-stream. Try to emit an error chunk
|
||||
# so the client gets a proper response instead of a
|
||||
# TransferEncodingError from incomplete chunked encoding.
|
||||
import traceback as _tb
|
||||
logger.error("Agent crashed mid-stream for %s: %s", completion_id, _tb.format_exc()[:300])
|
||||
try:
|
||||
error_chunk = {
|
||||
"id": completion_id, "object": "chat.completion.chunk",
|
||||
"created": created, "model": model,
|
||||
"choices": [{"index": 0, "delta": {}, "finish_reason": "error"}],
|
||||
}
|
||||
await response.write(f"data: {json.dumps(error_chunk)}\n\n".encode())
|
||||
await response.write(b"data: [DONE]\n\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return response
|
||||
|
||||
@@ -1260,6 +1383,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
conversation: Optional[str],
|
||||
store: bool,
|
||||
session_id: str,
|
||||
gateway_session_key: Optional[str] = None,
|
||||
) -> "web.StreamResponse":
|
||||
"""Write an SSE stream for POST /v1/responses (OpenAI Responses API).
|
||||
|
||||
@@ -1302,6 +1426,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
sse_headers.update(cors)
|
||||
if session_id:
|
||||
sse_headers["X-Hermes-Session-Id"] = session_id
|
||||
if gateway_session_key:
|
||||
sse_headers["X-Hermes-Session-Key"] = gateway_session_key
|
||||
response = web.StreamResponse(status=200, headers=sse_headers)
|
||||
await response.prepare(request)
|
||||
|
||||
@@ -1559,20 +1685,54 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
async def _dispatch(it) -> None:
|
||||
"""Route a queue item to the correct SSE emitter.
|
||||
|
||||
Plain strings are text deltas. Tagged tuples with
|
||||
``__tool_started__`` / ``__tool_completed__`` prefixes
|
||||
are tool lifecycle events.
|
||||
Plain strings are text deltas — they are batched (50ms)
|
||||
to reduce Open WebUI re-render storms. Tagged tuples
|
||||
with ``__tool_started__`` / ``__tool_completed__``
|
||||
prefixes are tool lifecycle events and flush the buffer
|
||||
before emitting.
|
||||
"""
|
||||
nonlocal _batch_timer
|
||||
if isinstance(it, tuple) and len(it) == 2 and isinstance(it[0], str):
|
||||
tag, payload = it
|
||||
# Flush batched text before tool events
|
||||
if _batch_buf:
|
||||
await _flush_batch()
|
||||
if tag == "__tool_started__":
|
||||
await _emit_tool_started(payload)
|
||||
elif tag == "__tool_completed__":
|
||||
await _emit_tool_completed(payload)
|
||||
# Unknown tags are silently ignored (forward-compat).
|
||||
elif isinstance(it, str):
|
||||
await _emit_text_delta(it)
|
||||
# Other types (non-string, non-tuple) are silently dropped.
|
||||
# Batch text deltas — append to buffer, flush on timer
|
||||
_batch_buf.append(it)
|
||||
if _batch_timer is None:
|
||||
_batch_timer = asyncio.create_task(_batch_flush_after(0.05))
|
||||
# Other types are silently dropped.
|
||||
|
||||
# ── Batching state ──
|
||||
_batch_buf: List[str] = []
|
||||
_batch_timer: Optional[asyncio.Task] = None
|
||||
_batch_lock = asyncio.Lock()
|
||||
|
||||
async def _batch_flush_after(delay: float) -> None:
|
||||
"""Wait delay seconds, then flush accumulated text deltas."""
|
||||
try:
|
||||
await asyncio.sleep(delay)
|
||||
except asyncio.CancelledError:
|
||||
return
|
||||
# Clear timer reference BEFORE flush so new deltas
|
||||
# can start a fresh timer while we emit
|
||||
nonlocal _batch_buf, _batch_timer
|
||||
_batch_timer = None
|
||||
await _flush_batch()
|
||||
|
||||
async def _flush_batch() -> None:
|
||||
"""Emit a single SSE delta for all accumulated text."""
|
||||
nonlocal _batch_buf
|
||||
async with _batch_lock:
|
||||
if _batch_buf:
|
||||
combined = "".join(_batch_buf)
|
||||
_batch_buf = []
|
||||
await _emit_text_delta(combined)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
while True:
|
||||
@@ -1597,11 +1757,21 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
continue
|
||||
|
||||
if item is None: # EOS sentinel
|
||||
# Cancel pending timer and flush remaining batched text
|
||||
if _batch_timer and not _batch_timer.done():
|
||||
_batch_timer.cancel()
|
||||
_batch_timer = None
|
||||
if _batch_buf:
|
||||
await _flush_batch()
|
||||
break
|
||||
|
||||
await _dispatch(item)
|
||||
last_activity = time.monotonic()
|
||||
|
||||
# Flush any final batched text before processing result
|
||||
if _batch_buf:
|
||||
await _flush_batch()
|
||||
|
||||
# Pick up agent result + usage from the completed task
|
||||
try:
|
||||
result, agent_usage = await agent_task
|
||||
@@ -1652,6 +1822,31 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
# payload still see the assistant text. This mirrors the
|
||||
# shape produced by _extract_output_items in the batch path.
|
||||
final_items: List[Dict[str, Any]] = list(emitted_items)
|
||||
|
||||
# Trim large content from tool call arguments to keep the
|
||||
# response.completed event under ~100KB. Clients already
|
||||
# received full details via incremental events.
|
||||
for _item in final_items:
|
||||
if _item.get("type") == "function_call":
|
||||
try:
|
||||
_args = json.loads(_item.get("arguments", "{}")) if isinstance(_item.get("arguments"), str) else _item.get("arguments", {})
|
||||
if isinstance(_args, dict):
|
||||
for _k in ("content", "query", "pattern", "old_string", "new_string"):
|
||||
if isinstance(_args.get(_k), str) and len(_args[_k]) > 500:
|
||||
_args[_k] = "[" + str(len(_args[_k])) + " chars — truncated for response.completed]"
|
||||
_item["arguments"] = json.dumps(_args)
|
||||
except Exception:
|
||||
pass
|
||||
elif _item.get("type") == "function_call_output":
|
||||
_output = _item.get("output", [])
|
||||
if isinstance(_output, list) and _output:
|
||||
_first = _output[0]
|
||||
if isinstance(_first, dict) and _first.get("type") == "input_text":
|
||||
_text = _first.get("text", "")
|
||||
if len(_text) > 1000:
|
||||
_first["text"] = _text[:500] + "...[" + str(len(_text) - 500) + " more chars]"
|
||||
_item["output"] = [_first]
|
||||
|
||||
final_items.append({
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
@@ -1742,6 +1937,30 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
agent_task.cancel()
|
||||
logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id)
|
||||
raise
|
||||
except Exception as _exc:
|
||||
# Agent crashed with an unhandled error (e.g. model API error like
|
||||
# BadRequestError, AuthenticationError). Emit a response.failed
|
||||
# event and properly terminate the SSE stream so the client doesn't
|
||||
# get a TransferEncodingError from incomplete chunked encoding.
|
||||
import traceback as _tb
|
||||
_persist_incomplete_if_needed()
|
||||
agent_error = _tb.format_exc()
|
||||
try:
|
||||
failed_env = _envelope("failed")
|
||||
failed_env["output"] = list(emitted_items)
|
||||
failed_env["error"] = {"message": str(_exc)[:500], "type": "server_error"}
|
||||
failed_env["usage"] = {
|
||||
"input_tokens": usage.get("input_tokens", 0),
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
await _write_event("response.failed", {
|
||||
"type": "response.failed",
|
||||
"response": failed_env,
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
logger.error("Agent crashed mid-stream for %s: %s", response_id, str(agent_error)[:300])
|
||||
|
||||
return response
|
||||
|
||||
@@ -1751,6 +1970,11 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
# Long-term memory scope header (see chat_completions for details).
|
||||
gateway_session_key, key_err = self._parse_session_key_header(request)
|
||||
if key_err is not None:
|
||||
return key_err
|
||||
|
||||
# Parse request body
|
||||
try:
|
||||
body = await request.json()
|
||||
@@ -1902,6 +2126,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
tool_start_callback=_on_tool_start,
|
||||
tool_complete_callback=_on_tool_complete,
|
||||
agent_ref=agent_ref,
|
||||
gateway_session_key=gateway_session_key,
|
||||
))
|
||||
|
||||
response_id = f"resp_{uuid.uuid4().hex[:28]}"
|
||||
@@ -1922,6 +2147,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
conversation=conversation,
|
||||
store=store,
|
||||
session_id=session_id,
|
||||
gateway_session_key=gateway_session_key,
|
||||
)
|
||||
|
||||
async def _compute_response():
|
||||
@@ -1930,6 +2156,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
conversation_history=conversation_history,
|
||||
ephemeral_system_prompt=instructions,
|
||||
session_id=session_id,
|
||||
gateway_session_key=gateway_session_key,
|
||||
)
|
||||
|
||||
idempotency_key = request.headers.get("Idempotency-Key")
|
||||
@@ -2004,7 +2231,10 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if conversation:
|
||||
self._response_store.set_conversation(conversation, response_id)
|
||||
|
||||
return web.json_response(response_data)
|
||||
response_headers = {"X-Hermes-Session-Id": session_id}
|
||||
if gateway_session_key:
|
||||
response_headers["X-Hermes-Session-Key"] = gateway_session_key
|
||||
return web.json_response(response_data, headers=response_headers)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# GET / DELETE response endpoints
|
||||
@@ -2326,6 +2556,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
tool_start_callback=None,
|
||||
tool_complete_callback=None,
|
||||
agent_ref: Optional[list] = None,
|
||||
gateway_session_key: Optional[str] = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Create an agent and run a conversation in a thread executor.
|
||||
@@ -2348,19 +2579,27 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
tool_progress_callback=tool_progress_callback,
|
||||
tool_start_callback=tool_start_callback,
|
||||
tool_complete_callback=tool_complete_callback,
|
||||
gateway_session_key=gateway_session_key,
|
||||
)
|
||||
if agent_ref is not None:
|
||||
agent_ref[0] = agent
|
||||
effective_task_id = session_id or str(uuid.uuid4())
|
||||
result = agent.run_conversation(
|
||||
user_message=user_message,
|
||||
conversation_history=conversation_history,
|
||||
task_id="default",
|
||||
task_id=effective_task_id,
|
||||
)
|
||||
usage = {
|
||||
"input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,
|
||||
"output_tokens": getattr(agent, "session_completion_tokens", 0) or 0,
|
||||
"total_tokens": getattr(agent, "session_total_tokens", 0) or 0,
|
||||
}
|
||||
# Include the effective session ID in the result so callers
|
||||
# (e.g. X-Hermes-Session-Id header) can track compression-
|
||||
# triggered session rotations. (#16938)
|
||||
_eff_sid = getattr(agent, "session_id", session_id)
|
||||
if isinstance(_eff_sid, str) and _eff_sid:
|
||||
result["session_id"] = _eff_sid
|
||||
return result, usage
|
||||
|
||||
return await loop.run_in_executor(None, _run)
|
||||
@@ -2440,6 +2679,11 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
# Long-term memory scope header (see chat_completions for details).
|
||||
gateway_session_key, key_err = self._parse_session_key_header(request)
|
||||
if key_err is not None:
|
||||
return key_err
|
||||
|
||||
# Enforce concurrency limit
|
||||
if len(self._run_streams) >= self._MAX_CONCURRENT_RUNS:
|
||||
return web.json_response(
|
||||
@@ -2548,13 +2792,15 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
session_id=session_id,
|
||||
stream_delta_callback=_text_cb,
|
||||
tool_progress_callback=event_cb,
|
||||
gateway_session_key=gateway_session_key,
|
||||
)
|
||||
self._active_run_agents[run_id] = agent
|
||||
def _run_sync():
|
||||
effective_task_id = session_id or run_id
|
||||
r = agent.run_conversation(
|
||||
user_message=user_message,
|
||||
conversation_history=conversation_history,
|
||||
task_id="default",
|
||||
task_id=effective_task_id,
|
||||
)
|
||||
u = {
|
||||
"input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,
|
||||
@@ -2564,21 +2810,39 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
return r, u
|
||||
|
||||
result, usage = await asyncio.get_running_loop().run_in_executor(None, _run_sync)
|
||||
final_response = result.get("final_response", "") if isinstance(result, dict) else ""
|
||||
q.put_nowait({
|
||||
"event": "run.completed",
|
||||
"run_id": run_id,
|
||||
"timestamp": time.time(),
|
||||
"output": final_response,
|
||||
"usage": usage,
|
||||
})
|
||||
self._set_run_status(
|
||||
run_id,
|
||||
"completed",
|
||||
output=final_response,
|
||||
usage=usage,
|
||||
last_event="run.completed",
|
||||
)
|
||||
# Check for structured failure (non-retryable client errors like
|
||||
# 401/400 return failed=True instead of raising, so the except
|
||||
# block below never fires — issue #15561).
|
||||
if isinstance(result, dict) and result.get("failed"):
|
||||
error_msg = result.get("error") or "agent run failed"
|
||||
q.put_nowait({
|
||||
"event": "run.failed",
|
||||
"run_id": run_id,
|
||||
"timestamp": time.time(),
|
||||
"error": error_msg,
|
||||
})
|
||||
self._set_run_status(
|
||||
run_id,
|
||||
"failed",
|
||||
error=error_msg,
|
||||
last_event="run.failed",
|
||||
)
|
||||
else:
|
||||
final_response = result.get("final_response", "") if isinstance(result, dict) else ""
|
||||
q.put_nowait({
|
||||
"event": "run.completed",
|
||||
"run_id": run_id,
|
||||
"timestamp": time.time(),
|
||||
"output": final_response,
|
||||
"usage": usage,
|
||||
})
|
||||
self._set_run_status(
|
||||
run_id,
|
||||
"completed",
|
||||
output=final_response,
|
||||
usage=usage,
|
||||
last_event="run.completed",
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
self._set_run_status(
|
||||
run_id,
|
||||
@@ -2629,7 +2893,14 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if hasattr(task, "add_done_callback"):
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
|
||||
return web.json_response({"run_id": run_id, "status": "started"}, status=202)
|
||||
response_headers = (
|
||||
{"X-Hermes-Session-Key": gateway_session_key} if gateway_session_key else {}
|
||||
)
|
||||
return web.json_response(
|
||||
{"run_id": run_id, "status": "started"},
|
||||
status=202,
|
||||
headers=response_headers,
|
||||
)
|
||||
|
||||
async def _handle_get_run(self, request: "web.Request") -> "web.Response":
|
||||
"""GET /v1/runs/{run_id} — return pollable run status for external UIs."""
|
||||
@@ -2773,7 +3044,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
try:
|
||||
mws = [mw for mw in (cors_middleware, body_limit_middleware, security_headers_middleware) if mw is not None]
|
||||
self._app = web.Application(middlewares=mws)
|
||||
self._app = web.Application(middlewares=mws, client_max_size=MAX_REQUEST_BYTES)
|
||||
self._app["api_server_adapter"] = self
|
||||
self._app.router.add_get("/health", self._handle_health)
|
||||
self._app.router.add_get("/health/detailed", self._handle_health_detailed)
|
||||
|
||||
+256
-23
@@ -416,7 +416,7 @@ def is_host_excluded_by_no_proxy(hostname: str, no_proxy_value: str | None = Non
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple
|
||||
from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple, Union
|
||||
from enum import Enum
|
||||
|
||||
from pathlib import Path as _Path
|
||||
@@ -981,7 +981,7 @@ def coerce_plaintext_gateway_command(event: "MessageEvent") -> None:
|
||||
return
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass
|
||||
class SendResult:
|
||||
"""Result of sending a message."""
|
||||
success: bool
|
||||
@@ -991,6 +991,45 @@ class SendResult:
|
||||
retryable: bool = False # True for transient connection errors — base will retry automatically
|
||||
|
||||
|
||||
class EphemeralReply(str):
|
||||
"""System-notice reply that auto-deletes after a TTL.
|
||||
|
||||
Slash-command handlers in ``gateway/run.py`` can return this wrapper
|
||||
instead of a plain string to request that the reply message be deleted
|
||||
after ``ttl_seconds`` on platforms that support ``delete_message``.
|
||||
|
||||
Subclassing ``str`` keeps the wrapper transparent to anything that
|
||||
treats handler return values as text (existing tests use ``in`` /
|
||||
``startswith`` / equality; the ``_process_message_background`` pipeline
|
||||
extracts attachments from the string content). ``isinstance(r,
|
||||
EphemeralReply)`` still distinguishes ephemeral replies from plain
|
||||
strings so the send path can schedule deletion.
|
||||
|
||||
Platforms that don't override :meth:`BasePlatformAdapter.delete_message`
|
||||
silently ignore the TTL — the message is sent normally and left in
|
||||
place. When ``ttl_seconds`` is ``None``, the pipeline uses the
|
||||
configured ``display.ephemeral_system_ttl`` default. A default of ``0``
|
||||
disables auto-deletion globally, preserving prior behavior.
|
||||
"""
|
||||
|
||||
ttl_seconds: Optional[int]
|
||||
|
||||
def __new__(cls, text: str, ttl_seconds: Optional[int] = None):
|
||||
instance = super().__new__(cls, text)
|
||||
instance.ttl_seconds = ttl_seconds
|
||||
return instance
|
||||
|
||||
@property
|
||||
def text(self) -> str:
|
||||
"""Return the underlying text.
|
||||
|
||||
Provided for call sites that want an explicit string conversion,
|
||||
though ``str(reply)`` and using ``reply`` directly where a string
|
||||
is expected both work identically.
|
||||
"""
|
||||
return str.__str__(self)
|
||||
|
||||
|
||||
def merge_pending_message_event(
|
||||
pending_messages: Dict[str, MessageEvent],
|
||||
session_key: str,
|
||||
@@ -1034,6 +1073,11 @@ def merge_pending_message_event(
|
||||
existing.text = event.text
|
||||
if existing_is_photo or incoming_is_photo:
|
||||
existing.message_type = MessageType.PHOTO
|
||||
elif (
|
||||
getattr(existing, "message_type", None) == MessageType.TEXT
|
||||
and event.message_type != MessageType.TEXT
|
||||
):
|
||||
existing.message_type = event.message_type
|
||||
return
|
||||
|
||||
if (
|
||||
@@ -1068,8 +1112,10 @@ _RETRYABLE_ERROR_PATTERNS = (
|
||||
)
|
||||
|
||||
|
||||
# Type for message handlers
|
||||
MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]
|
||||
# Type for message handlers. Handlers may return a plain string (normal
|
||||
# reply), an ``EphemeralReply`` to opt the reply into auto-deletion, or
|
||||
# ``None`` when the response was already delivered (e.g. via streaming).
|
||||
MessageHandler = Callable[[MessageEvent], Awaitable[Optional[Union[str, "EphemeralReply"]]]]
|
||||
|
||||
|
||||
def resolve_channel_prompt(
|
||||
@@ -1454,6 +1500,64 @@ class BasePlatformAdapter(ABC):
|
||||
"""
|
||||
return False
|
||||
|
||||
def _get_ephemeral_system_ttl_default(self) -> int:
|
||||
"""Read ``display.ephemeral_system_ttl`` from config.
|
||||
|
||||
Returns the TTL in seconds to use when an :class:`EphemeralReply`
|
||||
does not specify one explicitly. ``0`` (the default) disables
|
||||
auto-deletion. Non-fatal if config is unreadable.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_config
|
||||
except Exception:
|
||||
return 0
|
||||
try:
|
||||
cfg = _load_config()
|
||||
except Exception:
|
||||
return 0
|
||||
display = cfg.get("display", {}) if isinstance(cfg, dict) else {}
|
||||
if not isinstance(display, dict):
|
||||
return 0
|
||||
raw = display.get("ephemeral_system_ttl", 0)
|
||||
try:
|
||||
return int(raw)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
|
||||
def _schedule_ephemeral_delete(
|
||||
self,
|
||||
chat_id: str,
|
||||
message_id: str,
|
||||
ttl_seconds: int,
|
||||
) -> None:
|
||||
"""Spawn a detached task that deletes ``message_id`` after ``ttl_seconds``.
|
||||
|
||||
Best-effort — failures (gateway restart, permission denied, message
|
||||
too old for Telegram's 48h window) are swallowed at debug level.
|
||||
Does not block the caller.
|
||||
"""
|
||||
|
||||
async def _run_delete() -> None:
|
||||
try:
|
||||
await asyncio.sleep(max(1, int(ttl_seconds)))
|
||||
await self.delete_message(chat_id=chat_id, message_id=message_id)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"[%s] Ephemeral delete failed for %s/%s: %s",
|
||||
self.name, chat_id, message_id, e,
|
||||
)
|
||||
|
||||
coro = _run_delete()
|
||||
try:
|
||||
asyncio.create_task(coro)
|
||||
except RuntimeError:
|
||||
# No running loop (e.g. unit tests that never reach the async
|
||||
# path). Close the coroutine cleanly so Python doesn't warn
|
||||
# about it never being awaited, then drop silently.
|
||||
coro.close()
|
||||
|
||||
async def send_slash_confirm(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -1489,6 +1593,26 @@ class BasePlatformAdapter(ABC):
|
||||
"""
|
||||
return SendResult(success=False, error="Not supported")
|
||||
|
||||
async def send_private_notice(
|
||||
self,
|
||||
chat_id: str,
|
||||
user_id: Optional[str],
|
||||
content: str,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send a notice privately when the platform supports it.
|
||||
|
||||
The default implementation falls back to a normal send so callers can
|
||||
use one code path across platforms.
|
||||
"""
|
||||
return await self.send(
|
||||
chat_id=chat_id,
|
||||
content=content,
|
||||
reply_to=reply_to,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
async def send_typing(self, chat_id: str, metadata=None) -> None:
|
||||
"""
|
||||
Send a typing indicator.
|
||||
@@ -2043,6 +2167,28 @@ class BasePlatformAdapter(ABC):
|
||||
lowered = error.lower()
|
||||
return "timed out" in lowered or "readtimeout" in lowered or "writetimeout" in lowered
|
||||
|
||||
def _unwrap_ephemeral(self, response: Any) -> Tuple[Optional[str], int]:
|
||||
"""Unwrap a handler response into (text, ttl_seconds).
|
||||
|
||||
Accepts a plain string, ``None``, or an :class:`EphemeralReply`.
|
||||
Returns ``(text, ttl)`` where ``ttl > 0`` means the caller should
|
||||
schedule a deletion via :meth:`_schedule_ephemeral_delete` after
|
||||
the send succeeds. ``ttl`` is forced to 0 when the adapter
|
||||
doesn't override :meth:`delete_message` so non-supporting
|
||||
platforms silently degrade to normal sends.
|
||||
"""
|
||||
if isinstance(response, EphemeralReply):
|
||||
ttl = response.ttl_seconds
|
||||
if ttl is None:
|
||||
try:
|
||||
ttl = int(self._get_ephemeral_system_ttl_default())
|
||||
except Exception:
|
||||
ttl = 0
|
||||
if ttl and ttl > 0 and type(self).delete_message is BasePlatformAdapter.delete_message:
|
||||
ttl = 0
|
||||
return response.text, int(ttl or 0)
|
||||
return response, 0
|
||||
|
||||
async def _send_with_retry(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -2343,20 +2489,45 @@ class BasePlatformAdapter(ABC):
|
||||
|
||||
try:
|
||||
response = await self._message_handler(event)
|
||||
# Old adapter task (if any) is cancelled AFTER the runner has
|
||||
# fully handled the command — keeps ordering deterministic.
|
||||
_text, _eph_ttl = self._unwrap_ephemeral(response)
|
||||
# Send the response BEFORE cancelling the old task so the send
|
||||
# cannot be affected by task-cancellation side effects (race
|
||||
# condition fix — issue #18912). Previously the send happened
|
||||
# after cancel_session_processing, which could silently drop the
|
||||
# "/new" confirmation when an agent was actively running.
|
||||
if _text:
|
||||
logger.info(
|
||||
"[%s] Sending command '/%s' response (%d chars) to %s",
|
||||
self.name,
|
||||
cmd,
|
||||
len(_text),
|
||||
event.source.chat_id,
|
||||
)
|
||||
_r = await self._send_with_retry(
|
||||
chat_id=event.source.chat_id,
|
||||
content=_text,
|
||||
reply_to=(
|
||||
event.reply_to_message_id
|
||||
if event.source.platform == Platform.FEISHU
|
||||
and event.source.thread_id
|
||||
and event.reply_to_message_id
|
||||
else event.message_id
|
||||
),
|
||||
metadata=thread_meta,
|
||||
)
|
||||
if _eph_ttl > 0 and _r.success and _r.message_id:
|
||||
self._schedule_ephemeral_delete(
|
||||
chat_id=event.source.chat_id,
|
||||
message_id=_r.message_id,
|
||||
ttl_seconds=_eph_ttl,
|
||||
)
|
||||
# Old adapter task (if any) is cancelled AFTER the response has
|
||||
# been sent — keeps ordering deterministic and avoids the race.
|
||||
await self.cancel_session_processing(
|
||||
session_key,
|
||||
release_guard=False,
|
||||
discard_pending=False,
|
||||
)
|
||||
if response:
|
||||
await self._send_with_retry(
|
||||
chat_id=event.source.chat_id,
|
||||
content=response,
|
||||
reply_to=event.message_id,
|
||||
metadata=thread_meta,
|
||||
)
|
||||
except Exception:
|
||||
# On failure, restore the original guard if one still exists so
|
||||
# we don't leave the session in a half-reset state.
|
||||
@@ -2436,13 +2607,26 @@ class BasePlatformAdapter(ABC):
|
||||
try:
|
||||
_thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
|
||||
response = await self._message_handler(event)
|
||||
if response:
|
||||
await self._send_with_retry(
|
||||
_text, _eph_ttl = self._unwrap_ephemeral(response)
|
||||
if _text:
|
||||
_r = await self._send_with_retry(
|
||||
chat_id=event.source.chat_id,
|
||||
content=response,
|
||||
reply_to=event.message_id,
|
||||
content=_text,
|
||||
reply_to=(
|
||||
event.reply_to_message_id
|
||||
if event.source.platform == Platform.FEISHU
|
||||
and event.source.thread_id
|
||||
and event.reply_to_message_id
|
||||
else event.message_id
|
||||
),
|
||||
metadata=_thread_meta,
|
||||
)
|
||||
if _eph_ttl > 0 and _r.success and _r.message_id:
|
||||
self._schedule_ephemeral_delete(
|
||||
chat_id=event.source.chat_id,
|
||||
message_id=_r.message_id,
|
||||
ttl_seconds=_eph_ttl,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
|
||||
return
|
||||
@@ -2491,10 +2675,18 @@ class BasePlatformAdapter(ABC):
|
||||
mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
|
||||
if mode == "off":
|
||||
return 0.0
|
||||
min_ms = int(os.getenv("HERMES_HUMAN_DELAY_MIN_MS", "800"))
|
||||
max_ms = int(os.getenv("HERMES_HUMAN_DELAY_MAX_MS", "2500"))
|
||||
if mode == "natural":
|
||||
min_ms, max_ms = 800, 2500
|
||||
return random.uniform(min_ms / 1000.0, max_ms / 1000.0)
|
||||
# custom mode — tolerate malformed env vars instead of crashing.
|
||||
try:
|
||||
min_ms = int(os.getenv("HERMES_HUMAN_DELAY_MIN_MS", "800"))
|
||||
except (TypeError, ValueError):
|
||||
min_ms = 800
|
||||
try:
|
||||
max_ms = int(os.getenv("HERMES_HUMAN_DELAY_MAX_MS", "2500"))
|
||||
except (TypeError, ValueError):
|
||||
max_ms = 2500
|
||||
return random.uniform(min_ms / 1000.0, max_ms / 1000.0)
|
||||
|
||||
async def _process_message_background(self, event: MessageEvent, session_key: str) -> None:
|
||||
@@ -2516,7 +2708,6 @@ class BasePlatformAdapter(ABC):
|
||||
# Fall back to a new Event only if the entry was removed externally.
|
||||
interrupt_event = self._active_sessions.get(session_key) or asyncio.Event()
|
||||
self._active_sessions[session_key] = interrupt_event
|
||||
callback_generation = getattr(interrupt_event, "_hermes_run_generation", None)
|
||||
|
||||
# Start continuous typing indicator (refreshes every 2 seconds)
|
||||
_thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
|
||||
@@ -2549,7 +2740,16 @@ class BasePlatformAdapter(ABC):
|
||||
|
||||
# Call the handler (this can take a while with tool calls)
|
||||
response = await self._message_handler(event)
|
||||
|
||||
|
||||
# Slash-command handlers may return an EphemeralReply sentinel to
|
||||
# request that their reply message auto-delete after a TTL (used
|
||||
# for system notices like "✨ New session started!" that the user
|
||||
# doesn't need to keep in the thread). Unwrap here so all the
|
||||
# downstream extract_media / text-processing logic sees a plain
|
||||
# string, and remember the TTL + platform capability so the
|
||||
# post-send block can schedule the deletion.
|
||||
response, _ephemeral_ttl = self._unwrap_ephemeral(response)
|
||||
|
||||
# Send response if any. A None/empty response is normal when
|
||||
# streaming already delivered the text (already_sent=True) or
|
||||
# when the message was queued behind an active agent. Log at
|
||||
@@ -2630,14 +2830,34 @@ class BasePlatformAdapter(ABC):
|
||||
# Send the text portion
|
||||
if text_content:
|
||||
logger.info("[%s] Sending response (%d chars) to %s", self.name, len(text_content), event.source.chat_id)
|
||||
_reply_anchor = (
|
||||
event.reply_to_message_id
|
||||
if event.source.platform == Platform.FEISHU and event.source.thread_id and event.reply_to_message_id
|
||||
else event.message_id
|
||||
)
|
||||
result = await self._send_with_retry(
|
||||
chat_id=event.source.chat_id,
|
||||
content=text_content,
|
||||
reply_to=event.message_id,
|
||||
reply_to=_reply_anchor,
|
||||
metadata=_thread_metadata,
|
||||
)
|
||||
_record_delivery(result)
|
||||
|
||||
# Schedule auto-deletion of system-notice replies.
|
||||
# Detached so the handler returns immediately; errors
|
||||
# (permission denied, message too old) are swallowed.
|
||||
if (
|
||||
_ephemeral_ttl
|
||||
and _ephemeral_ttl > 0
|
||||
and result.success
|
||||
and result.message_id
|
||||
):
|
||||
self._schedule_ephemeral_delete(
|
||||
chat_id=event.source.chat_id,
|
||||
message_id=result.message_id,
|
||||
ttl_seconds=_ephemeral_ttl,
|
||||
)
|
||||
|
||||
# Human-like pacing delay between text and media
|
||||
human_delay = self._get_human_delay()
|
||||
|
||||
@@ -2815,7 +3035,20 @@ class BasePlatformAdapter(ABC):
|
||||
finally:
|
||||
# Fire any one-shot post-delivery callback registered for this
|
||||
# session (e.g. deferred background-review notifications).
|
||||
_callback_generation = callback_generation
|
||||
#
|
||||
# Snapshot the callback generation HERE (after the agent has run),
|
||||
# not at the top of this task. _hermes_run_generation is set on
|
||||
# the interrupt event by GatewayRunner._bind_adapter_run_generation
|
||||
# during _handle_message_with_agent — which happens DURING the
|
||||
# self._message_handler(event) await above. Snapshotting earlier
|
||||
# always captured None, which bypassed the generation-ownership
|
||||
# check in pop_post_delivery_callback and let stale runs fire a
|
||||
# fresher run's callbacks.
|
||||
_callback_generation = getattr(
|
||||
interrupt_event,
|
||||
"_hermes_run_generation",
|
||||
None,
|
||||
)
|
||||
if hasattr(self, "pop_post_delivery_callback"):
|
||||
_post_cb = self.pop_post_delivery_callback(
|
||||
session_key,
|
||||
|
||||
@@ -162,7 +162,9 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
return False
|
||||
from aiohttp import web
|
||||
|
||||
self.client = httpx.AsyncClient(timeout=30.0)
|
||||
# Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
|
||||
from gateway.platforms._http_client_limits import platform_httpx_limits
|
||||
self.client = httpx.AsyncClient(timeout=30.0, limits=platform_httpx_limits())
|
||||
try:
|
||||
await self._api_get("/api/v1/ping")
|
||||
info = await self._api_get("/api/v1/server/info")
|
||||
|
||||
@@ -228,7 +228,11 @@ class DingTalkAdapter(BasePlatformAdapter):
|
||||
return False
|
||||
|
||||
try:
|
||||
self._http_client = httpx.AsyncClient(timeout=30.0)
|
||||
# Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
|
||||
from gateway.platforms._http_client_limits import platform_httpx_limits
|
||||
self._http_client = httpx.AsyncClient(
|
||||
timeout=30.0, limits=platform_httpx_limits(),
|
||||
)
|
||||
|
||||
credential = dingtalk_stream.Credential(
|
||||
self._client_id, self._client_secret
|
||||
|
||||
+535
-52
@@ -497,6 +497,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
self._ready_event = asyncio.Event()
|
||||
self._allowed_user_ids: set = set() # For button approval authorization
|
||||
self._allowed_role_ids: set = set() # For DISCORD_ALLOWED_ROLES filtering
|
||||
self.gateway_runner = None # Set by gateway/run.py for cross-platform delivery
|
||||
# Voice channel state (per-guild)
|
||||
self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient
|
||||
self._voice_locks: Dict[int, asyncio.Lock] = {} # guild_id -> serialize join/leave
|
||||
@@ -613,6 +614,21 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
# so LLM output or echoed user content can't ping the whole
|
||||
# server; override per DISCORD_ALLOW_MENTION_* env vars or the
|
||||
# discord.allow_mentions.* block in config.yaml.
|
||||
|
||||
# Close any existing client to prevent zombie websocket connections
|
||||
# on reconnect (see #18187). Without this, the old client remains
|
||||
# connected to Discord gateway and both fire on_message, causing
|
||||
# double responses.
|
||||
if self._client is not None:
|
||||
try:
|
||||
if not self._client.is_closed():
|
||||
await self._client.close()
|
||||
except Exception:
|
||||
logger.debug("[%s] Failed to close previous Discord client", self.name)
|
||||
finally:
|
||||
self._client = None
|
||||
self._ready_event.clear()
|
||||
|
||||
self._client = commands.Bot(
|
||||
command_prefix="!", # Not really used, we handle raw messages
|
||||
intents=intents,
|
||||
@@ -704,11 +720,22 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
return
|
||||
# If humans are mentioned but we're not → not for us
|
||||
# (preserves old DISCORD_IGNORE_NO_MENTION=true behavior)
|
||||
# EXCEPT in free-response channels where the bot should
|
||||
# answer regardless of who is mentioned.
|
||||
_ignore_no_mention = os.getenv(
|
||||
"DISCORD_IGNORE_NO_MENTION", "true"
|
||||
).lower() in ("true", "1", "yes")
|
||||
if _ignore_no_mention and not _self_mentioned and not _other_bots_mentioned:
|
||||
return
|
||||
_channel_id = str(message.channel.id)
|
||||
_parent_id = None
|
||||
if hasattr(message.channel, "parent_id") and message.channel.parent_id:
|
||||
_parent_id = str(message.channel.parent_id)
|
||||
_free_channels = adapter_self._discord_free_response_channels()
|
||||
_channel_ids = {_channel_id}
|
||||
if _parent_id:
|
||||
_channel_ids.add(_parent_id)
|
||||
if "*" not in _free_channels and not (_channel_ids & _free_channels):
|
||||
return
|
||||
|
||||
await self._handle_message(message)
|
||||
|
||||
@@ -1914,6 +1941,225 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
return True
|
||||
return False
|
||||
|
||||
# ── Slash command authorization ─────────────────────────────────────
|
||||
# Slash commands (``_run_simple_slash`` and ``_handle_thread_create_slash``)
|
||||
# are a separate Discord interaction surface from regular messages and
|
||||
# historically ran with NO authorization check — bypassing every gate
|
||||
# ``on_message`` enforces (DISCORD_ALLOWED_USERS, DISCORD_ALLOWED_ROLES,
|
||||
# DISCORD_ALLOWED_CHANNELS, DISCORD_IGNORED_CHANNELS). Any guild member
|
||||
# could invoke ``/background``, ``/restart``, ``/sethome``, etc. as the
|
||||
# operator. ``_check_slash_authorization`` mirrors the on_message gates
|
||||
# one-for-one so the slash surface honors the same trust boundary.
|
||||
#
|
||||
# By design, this is a no-op for deployments with no allowlist env vars
|
||||
# set — ``_is_allowed_user`` returns True and the channel checks early-out
|
||||
# — preserving the existing "single-tenant, all guild members trusted"
|
||||
# default. Deployments that DO set any DISCORD_ALLOWED_* var get slash
|
||||
# parity with on_message.
|
||||
|
||||
def _evaluate_slash_authorization(
|
||||
self, interaction: "discord.Interaction",
|
||||
) -> Tuple[bool, Optional[str]]:
|
||||
"""Evaluate slash authorization without producing any response.
|
||||
|
||||
Returns ``(allowed, reason)``. ``reason`` is populated only when
|
||||
``allowed`` is False. This is the shared core used by both the
|
||||
responding wrapper (``_check_slash_authorization``) and side-effect-
|
||||
free callers like the ``/skill`` autocomplete callback, which must
|
||||
return an empty list for unauthorized users instead of leaking an
|
||||
ephemeral rejection per-keystroke.
|
||||
|
||||
Fail-closed semantics for malformed payloads: when an allowlist is
|
||||
configured but the interaction is missing the data needed to
|
||||
evaluate it (no channel id with channel policy active, no user
|
||||
with user/role policy active), the gate REJECTS rather than
|
||||
falling through. Without these guards a guild interaction that
|
||||
happens to deserialize without a channel id would silently bypass
|
||||
``DISCORD_ALLOWED_CHANNELS`` and a payload missing ``user`` would
|
||||
raise ``AttributeError`` in the user check below, surfacing as
|
||||
an opaque interaction failure rather than a clean rejection.
|
||||
"""
|
||||
chan_obj = getattr(interaction, "channel", None)
|
||||
in_dm = isinstance(chan_obj, discord.DMChannel) if chan_obj is not None else False
|
||||
|
||||
# ── Channel scope (mirrors on_message lines 3374-3388) ──
|
||||
# DMs aren't channel-gated — DMs follow on_message's DM lockdown
|
||||
# path which has its own user-allowlist enforcement.
|
||||
if not in_dm:
|
||||
chan_id_raw = getattr(interaction, "channel_id", None) or getattr(
|
||||
chan_obj, "id", None,
|
||||
)
|
||||
channel_ids: set = set()
|
||||
if chan_id_raw is not None:
|
||||
channel_ids.add(str(chan_id_raw))
|
||||
# Mirror on_message: also test the parent channel for threads
|
||||
# so per-channel allow/deny lists work consistently.
|
||||
if isinstance(chan_obj, discord.Thread):
|
||||
parent_id = self._get_parent_channel_id(chan_obj)
|
||||
if parent_id:
|
||||
channel_ids.add(str(parent_id))
|
||||
|
||||
allowed_raw = os.getenv("DISCORD_ALLOWED_CHANNELS", "")
|
||||
if allowed_raw:
|
||||
allowed = {c.strip() for c in allowed_raw.split(",") if c.strip()}
|
||||
if "*" not in allowed:
|
||||
if not channel_ids:
|
||||
# Channel policy is configured but the interaction
|
||||
# has no resolvable channel id. Fail closed.
|
||||
return (
|
||||
False,
|
||||
"channel id missing with DISCORD_ALLOWED_CHANNELS configured",
|
||||
)
|
||||
if not (channel_ids & allowed):
|
||||
return (False, "channel not in DISCORD_ALLOWED_CHANNELS")
|
||||
|
||||
# Ignored beats allowed: even when a thread's parent channel
|
||||
# is on the allowlist, an explicit DISCORD_IGNORED_CHANNELS
|
||||
# entry on the thread or its parent rejects the interaction.
|
||||
ignored_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "")
|
||||
if ignored_raw and channel_ids:
|
||||
ignored = {c.strip() for c in ignored_raw.split(",") if c.strip()}
|
||||
if "*" in ignored or (channel_ids & ignored):
|
||||
return (False, "channel in DISCORD_IGNORED_CHANNELS")
|
||||
|
||||
# ── User / role allowlist (mirrors on_message line 681) ──
|
||||
user = getattr(interaction, "user", None)
|
||||
allowed_users = getattr(self, "_allowed_user_ids", set()) or set()
|
||||
allowed_roles = getattr(self, "_allowed_role_ids", set()) or set()
|
||||
if user is None or getattr(user, "id", None) is None:
|
||||
# No identifiable user. With any user/role allowlist
|
||||
# configured, fail closed rather than raise AttributeError
|
||||
# on ``interaction.user.id`` below. With no allowlist this
|
||||
# is the existing "no allowlist = everyone" backwards-compat.
|
||||
if allowed_users or allowed_roles:
|
||||
return (False, "missing interaction.user with allowlist configured")
|
||||
return (True, None)
|
||||
|
||||
user_id = str(user.id)
|
||||
if not self._is_allowed_user(user_id, author=user):
|
||||
return (
|
||||
False,
|
||||
"user not in DISCORD_ALLOWED_USERS / DISCORD_ALLOWED_ROLES",
|
||||
)
|
||||
|
||||
return (True, None)
|
||||
|
||||
async def _check_slash_authorization(
|
||||
self, interaction: "discord.Interaction", command_text: str,
|
||||
) -> bool:
|
||||
"""Mirror on_message's user/role/channel gates onto a slash invocation.
|
||||
|
||||
Returns True to proceed. Returns False *after* sending an ephemeral
|
||||
rejection, logging a warning, and scheduling a cross-platform admin
|
||||
alert — the caller must stop on False (the interaction has already
|
||||
been responded to).
|
||||
"""
|
||||
allowed, reason = self._evaluate_slash_authorization(interaction)
|
||||
if allowed:
|
||||
return True
|
||||
return await self._reject_slash(
|
||||
interaction, command_text, reason=reason or "unauthorized",
|
||||
)
|
||||
|
||||
async def _reject_slash(
|
||||
self, interaction: "discord.Interaction", command_text: str, *, reason: str,
|
||||
) -> bool:
|
||||
"""Send ephemeral reject + log warning + schedule admin alert. Returns False.
|
||||
|
||||
Tolerates a missing ``interaction.user`` -- the fail-closed branch
|
||||
in ``_evaluate_slash_authorization`` deliberately routes here for
|
||||
malformed payloads (no user) when an allowlist is configured, and
|
||||
``str(interaction.user.id)`` would raise AttributeError before the
|
||||
ephemeral rejection could be sent.
|
||||
"""
|
||||
user = getattr(interaction, "user", None)
|
||||
if user is not None:
|
||||
user_id = str(getattr(user, "id", "?"))
|
||||
user_name = getattr(user, "name", "?")
|
||||
else:
|
||||
user_id = "?"
|
||||
user_name = "?"
|
||||
chan_id = getattr(interaction, "channel_id", None) or getattr(
|
||||
getattr(interaction, "channel", None), "id", None,
|
||||
)
|
||||
guild_id = getattr(interaction, "guild_id", None)
|
||||
|
||||
logger.warning(
|
||||
"[Discord] Unauthorized slash attempt: user=%s id=%s channel=%s "
|
||||
"guild=%s cmd=%r reason=%r",
|
||||
user_name, user_id, chan_id, guild_id, command_text, reason,
|
||||
)
|
||||
|
||||
try:
|
||||
await interaction.response.send_message(
|
||||
"You're not authorized to use this command.",
|
||||
ephemeral=True,
|
||||
)
|
||||
except Exception as e:
|
||||
# Interaction may already be responded to (e.g. caller deferred
|
||||
# before the auth check, or Discord retried). Best-effort only.
|
||||
logger.debug("[Discord] Could not send unauthorized ephemeral: %s", e)
|
||||
|
||||
# Fire-and-forget: don't block the interaction handler on Telegram I/O.
|
||||
try:
|
||||
asyncio.create_task(self._notify_unauthorized_slash(
|
||||
user_name, user_id, chan_id, guild_id, command_text, reason,
|
||||
))
|
||||
except Exception as e:
|
||||
logger.debug("[Discord] Could not schedule admin notify task: %s", e)
|
||||
|
||||
return False
|
||||
|
||||
async def _notify_unauthorized_slash(
|
||||
self, user_name: str, user_id: str, chan_id, guild_id,
|
||||
command_text: str, reason: str,
|
||||
) -> None:
|
||||
"""Best-effort cross-platform alert to the gateway operator.
|
||||
|
||||
Tries TELEGRAM first (most operators set TELEGRAM_HOME_CHANNEL),
|
||||
then SLACK. Silently no-ops if no other platform is configured
|
||||
with a home channel.
|
||||
|
||||
A soft send failure -- adapter.send() returning a result with
|
||||
``success=False`` rather than raising -- continues the fallback
|
||||
chain. Treating a SendResult(success=False) as delivered would
|
||||
mean a Telegram outage that the adapter politely surfaces (e.g.
|
||||
rate-limit, auth failure) silently swallows the alert without
|
||||
attempting Slack. Hard exceptions still take the same path via
|
||||
the except branch below.
|
||||
"""
|
||||
runner = getattr(self, "gateway_runner", None)
|
||||
if not runner:
|
||||
return
|
||||
for target in (Platform.TELEGRAM, Platform.SLACK):
|
||||
try:
|
||||
adapter = runner.adapters.get(target)
|
||||
if not adapter:
|
||||
continue
|
||||
home = runner.config.get_home_channel(target)
|
||||
if not home or not getattr(home, "chat_id", None):
|
||||
continue
|
||||
msg = (
|
||||
"⚠️ Unauthorized Discord slash attempt\n"
|
||||
f"User: {user_name} ({user_id})\n"
|
||||
f"Channel: {chan_id} (guild {guild_id})\n"
|
||||
f"Command: {command_text}\n"
|
||||
f"Reason: {reason}"
|
||||
)
|
||||
result = await adapter.send(str(home.chat_id), msg)
|
||||
# Only return on confirmed delivery. SendResult(success=False)
|
||||
# -> continue to the next platform.
|
||||
if getattr(result, "success", None) is False:
|
||||
logger.debug(
|
||||
"[Discord] Admin notify via %s returned success=False"
|
||||
" (error=%r); falling through",
|
||||
target, getattr(result, "error", None),
|
||||
)
|
||||
continue
|
||||
return
|
||||
except Exception as e:
|
||||
logger.debug("[Discord] Admin notify via %s failed: %s", target, e)
|
||||
|
||||
async def send_image_file(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -2301,6 +2547,11 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
except Exception:
|
||||
pass # logging must never block command dispatch
|
||||
|
||||
# Auth gate — must run before defer() so an ephemeral rejection can
|
||||
# be delivered on the still-unresponded interaction.
|
||||
if not await self._check_slash_authorization(interaction, command_text):
|
||||
return
|
||||
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, command_text)
|
||||
await self.handle_message(event)
|
||||
@@ -2403,9 +2654,14 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
await self._run_simple_slash(interaction, "/reload-skills")
|
||||
|
||||
@tree.command(name="voice", description="Toggle voice reply mode")
|
||||
@discord.app_commands.describe(mode="Voice mode: on, off, tts, channel, leave, or status")
|
||||
@discord.app_commands.describe(mode="Voice mode: join, channel, leave, on, tts, off, or status")
|
||||
@discord.app_commands.choices(mode=[
|
||||
discord.app_commands.Choice(name="channel — join your voice channel", value="channel"),
|
||||
# `join` and `channel` both route to _handle_voice_channel_join in
|
||||
# gateway/run.py — expose both in the slash UI so autocomplete
|
||||
# matches what the docs advertise and what the runner accepts when
|
||||
# the command is typed as plain text.
|
||||
discord.app_commands.Choice(name="join — join your voice channel", value="join"),
|
||||
discord.app_commands.Choice(name="channel — join your voice channel (alias)", value="channel"),
|
||||
discord.app_commands.Choice(name="leave — leave voice channel", value="leave"),
|
||||
discord.app_commands.Choice(name="on — voice reply to voice messages", value="on"),
|
||||
discord.app_commands.Choice(name="tts — voice reply to all messages", value="tts"),
|
||||
@@ -2445,7 +2701,8 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
message: str = "",
|
||||
auto_archive_duration: int = 1440,
|
||||
):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
# defer() is performed inside the handler *after* the auth gate
|
||||
# so a rejected invoker can receive an ephemeral rejection.
|
||||
await self._handle_thread_create_slash(interaction, name, message, auto_archive_duration)
|
||||
|
||||
@tree.command(name="queue", description="Queue a prompt for the next turn (doesn't interrupt)")
|
||||
@@ -2566,6 +2823,54 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
# supporting up to 25 categories × 25 skills = 625 skills.
|
||||
self._register_skill_group(tree)
|
||||
|
||||
# Optional defense-in-depth: hide every slash command from non-admin
|
||||
# guild members in Discord's slash picker. Server-side authorization
|
||||
# (``_check_slash_authorization``) is the actual gate; this is purely
|
||||
# UX so users don't see commands they can't invoke. Off by default
|
||||
# to preserve the slash UX for deployments that intentionally allow
|
||||
# everyone in the guild.
|
||||
if os.getenv("DISCORD_HIDE_SLASH_COMMANDS", "false").strip().lower() in (
|
||||
"true", "1", "yes", "on",
|
||||
):
|
||||
self._apply_owner_only_visibility(tree)
|
||||
|
||||
def _apply_owner_only_visibility(self, tree) -> None:
|
||||
"""Set default_member_permissions=0 on every registered slash command.
|
||||
|
||||
Discord interprets ``Permissions(0)`` as "requires no permissions",
|
||||
which paradoxically means the command is hidden from every guild
|
||||
member except those with the Administrator permission. Server admins
|
||||
can re-grant per user/role via Server Settings → Integrations →
|
||||
<bot> → Permissions.
|
||||
|
||||
Authoritative gate is ``_check_slash_authorization`` on every
|
||||
invocation, which catches stale clients, role grants made by
|
||||
mistake, and direct API calls bypassing Discord's UI hide.
|
||||
"""
|
||||
try:
|
||||
no_perms = discord.Permissions(0)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"[Discord] _apply_owner_only_visibility: cannot build Permissions(0): %s",
|
||||
e,
|
||||
)
|
||||
return
|
||||
applied = 0
|
||||
for cmd in tree.get_commands():
|
||||
try:
|
||||
cmd.default_permissions = no_perms
|
||||
applied += 1
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"[Discord] Could not set default_permissions on %r: %s",
|
||||
getattr(cmd, "name", "?"), e,
|
||||
)
|
||||
logger.info(
|
||||
"[Discord] Hid %d slash command(s) from non-admin guild members "
|
||||
"(opt-in defense in depth via DISCORD_HIDE_SLASH_COMMANDS).",
|
||||
applied,
|
||||
)
|
||||
|
||||
def _register_skill_group(self, tree) -> None:
|
||||
"""Register a single ``/skill`` command with autocomplete on the name.
|
||||
|
||||
@@ -2584,40 +2889,32 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
hidden skills. The slash picker also becomes more discoverable —
|
||||
Discord live-filters by the user's typed prefix against both the
|
||||
skill name and its description.
|
||||
|
||||
The entries list and lookup dict are stored on ``self`` rather
|
||||
than captured in closure variables so :meth:`refresh_skill_group`
|
||||
can repopulate them when the user runs ``/reload-skills`` without
|
||||
needing to touch the Discord slash-command tree or trigger a
|
||||
``tree.sync()`` call.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.commands import discord_skill_commands_by_category
|
||||
|
||||
existing_names = set()
|
||||
try:
|
||||
existing_names = {cmd.name for cmd in tree.get_commands()}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Reuse the existing collector for consistent filtering
|
||||
# (per-platform disabled, hub-excluded, name clamping), then
|
||||
# flatten — the category grouping was only useful for the
|
||||
# nested layout.
|
||||
categories, uncategorized, hidden = discord_skill_commands_by_category(
|
||||
reserved_names=existing_names,
|
||||
)
|
||||
entries: list[tuple[str, str, str]] = list(uncategorized)
|
||||
for cat_skills in categories.values():
|
||||
entries.extend(cat_skills)
|
||||
# Populate the instance-level entries/lookup so the
|
||||
# autocomplete + handler callbacks below always read the
|
||||
# freshest state. refresh_skill_group() re-runs the same
|
||||
# collector and mutates these two attributes in place.
|
||||
self._skill_entries: list[tuple[str, str, str]] = []
|
||||
self._skill_lookup: dict[str, tuple[str, str]] = {}
|
||||
self._skill_group_reserved_names: set[str] = set(existing_names)
|
||||
self._refresh_skill_catalog_state()
|
||||
|
||||
if not entries:
|
||||
if not self._skill_entries:
|
||||
return
|
||||
|
||||
# Stable alphabetical order so the autocomplete suggestion
|
||||
# list is predictable across restarts.
|
||||
entries.sort(key=lambda t: t[0])
|
||||
|
||||
# name -> (description, cmd_key) — used by both the autocomplete
|
||||
# callback and the handler for O(1) dispatch.
|
||||
skill_lookup: dict[str, tuple[str, str]] = {
|
||||
n: (d, k) for n, d, k in entries
|
||||
}
|
||||
|
||||
async def _autocomplete_name(
|
||||
interaction: "discord.Interaction", current: str,
|
||||
) -> list:
|
||||
@@ -2627,10 +2924,29 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
"/skill pdf" surfaces skills whose description mentions
|
||||
PDFs even if the name doesn't. Discord caps this list at
|
||||
25 entries per query.
|
||||
|
||||
Authorization: a quiet pre-check evaluates the slash
|
||||
allowlists and returns ``[]`` for unauthorized users so
|
||||
the installed skill catalog is not leaked to anyone who
|
||||
can see the command in the picker. Returning a generic
|
||||
empty list here is intentional — sending a per-keystroke
|
||||
ephemeral rejection would produce a barrage of error
|
||||
popups during typing.
|
||||
|
||||
Reads ``self._skill_entries`` so a ``/reload-skills`` run
|
||||
since process start shows up on the very next keystroke.
|
||||
"""
|
||||
try:
|
||||
allowed, _reason = self._evaluate_slash_authorization(interaction)
|
||||
except Exception:
|
||||
# Defensive: never raise from autocomplete. Fail
|
||||
# closed by returning an empty suggestion list.
|
||||
return []
|
||||
if not allowed:
|
||||
return []
|
||||
q = (current or "").strip().lower()
|
||||
choices: list = []
|
||||
for name, desc, _key in entries:
|
||||
for name, desc, _key in self._skill_entries:
|
||||
if not q or q in name.lower() or (desc and q in desc.lower()):
|
||||
if desc:
|
||||
label = f"{name} — {desc}"
|
||||
@@ -2654,7 +2970,13 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
async def _skill_handler(
|
||||
interaction: "discord.Interaction", name: str, args: str = "",
|
||||
):
|
||||
entry = skill_lookup.get(name)
|
||||
# Authorize BEFORE any skill lookup so that known and
|
||||
# unknown skill names produce identical rejections for
|
||||
# unauthorized users (no probing the installed catalog
|
||||
# via "Unknown skill: <name>" responses).
|
||||
if not await self._check_slash_authorization(interaction, "/skill"):
|
||||
return
|
||||
entry = self._skill_lookup.get(name)
|
||||
if not entry:
|
||||
await interaction.response.send_message(
|
||||
f"Unknown skill: `{name}`. Start typing for "
|
||||
@@ -2676,16 +2998,74 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
|
||||
logger.info(
|
||||
"[%s] Registered /skill command with %d skill(s) via autocomplete",
|
||||
self.name, len(entries),
|
||||
self.name, len(self._skill_entries),
|
||||
)
|
||||
if hidden:
|
||||
if self._skill_group_hidden_count:
|
||||
logger.info(
|
||||
"[%s] %d skill(s) filtered out of /skill (name clamp / reserved)",
|
||||
self.name, hidden,
|
||||
self.name, self._skill_group_hidden_count,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("[%s] Failed to register /skill command: %s", self.name, exc)
|
||||
|
||||
def _refresh_skill_catalog_state(self) -> None:
|
||||
"""Re-scan disk for skills and repopulate ``self._skill_entries``.
|
||||
|
||||
Called once from :meth:`_register_skill_group` at startup and
|
||||
again from :meth:`refresh_skill_group` whenever the user runs
|
||||
``/reload-skills``. No Discord API calls are made — autocomplete
|
||||
and the handler both read from these instance attributes
|
||||
directly, so an in-place mutation is sufficient.
|
||||
"""
|
||||
from hermes_cli.commands import discord_skill_commands_by_category
|
||||
|
||||
reserved = getattr(self, "_skill_group_reserved_names", set())
|
||||
categories, uncategorized, hidden = discord_skill_commands_by_category(
|
||||
reserved_names=set(reserved),
|
||||
)
|
||||
entries: list[tuple[str, str, str]] = list(uncategorized)
|
||||
for cat_skills in categories.values():
|
||||
entries.extend(cat_skills)
|
||||
# Stable alphabetical order so the autocomplete suggestion
|
||||
# list is predictable across restarts.
|
||||
entries.sort(key=lambda t: t[0])
|
||||
|
||||
self._skill_entries = entries
|
||||
self._skill_lookup = {n: (d, k) for n, d, k in entries}
|
||||
self._skill_group_hidden_count = hidden
|
||||
|
||||
def refresh_skill_group(self) -> tuple[int, int]:
|
||||
"""Rescan skills and update the live ``/skill`` autocomplete state.
|
||||
|
||||
Invoked by :meth:`gateway.run.GatewayOrchestrator._handle_reload_skills_command`
|
||||
after :func:`agent.skill_commands.reload_skills` has refreshed
|
||||
the in-process skill-command registry. Without this call, the
|
||||
``/skill`` autocomplete dropdown keeps showing the list captured
|
||||
at process start — new skills stay invisible and deleted skills
|
||||
return an "Unknown skill" error when clicked.
|
||||
|
||||
Because autocomplete options are fetched dynamically by Discord,
|
||||
we only need to mutate the entries/lookup attributes read by the
|
||||
callbacks — no ``tree.sync()`` is required.
|
||||
|
||||
Returns ``(new_count, hidden_count)``.
|
||||
"""
|
||||
try:
|
||||
self._refresh_skill_catalog_state()
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"[%s] Failed to refresh /skill autocomplete after reload: %s",
|
||||
self.name, exc,
|
||||
)
|
||||
return (len(getattr(self, "_skill_entries", [])), 0)
|
||||
logger.info(
|
||||
"[%s] Refreshed /skill autocomplete: %d skill(s) available (%d filtered)",
|
||||
self.name,
|
||||
len(self._skill_entries),
|
||||
self._skill_group_hidden_count,
|
||||
)
|
||||
return (len(self._skill_entries), self._skill_group_hidden_count)
|
||||
|
||||
def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent:
|
||||
"""Build a MessageEvent from a Discord slash command interaction."""
|
||||
is_dm = isinstance(interaction.channel, discord.DMChannel)
|
||||
@@ -2743,6 +3123,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
auto_archive_duration: int = 1440,
|
||||
) -> None:
|
||||
"""Create a Discord thread from a slash command and start a session in it."""
|
||||
if not await self._check_slash_authorization(interaction, "/thread"):
|
||||
return
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
result = await self._create_thread(
|
||||
interaction,
|
||||
name=name,
|
||||
@@ -2851,8 +3234,15 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "")
|
||||
if isinstance(raw, list):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
return {part.strip() for part in raw.split(",") if part.strip()}
|
||||
# Coerce non-list scalars (str/int/float) to str before splitting.
|
||||
# YAML parses a bare numeric value such as
|
||||
# `free_response_channels: 1491973769726791812` as int, which was
|
||||
# previously falling through the isinstance(str) branch and silently
|
||||
# returning an empty set. str() here accepts whatever scalar the YAML
|
||||
# loader hands us without changing existing string/CSV semantics.
|
||||
s = str(raw).strip() if raw is not None else ""
|
||||
if s:
|
||||
return {part.strip() for part in s.split(",") if part.strip()}
|
||||
return set()
|
||||
|
||||
def _thread_parent_channel(self, channel: Any) -> Any:
|
||||
@@ -3030,6 +3420,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
view = ExecApprovalView(
|
||||
session_key=session_key,
|
||||
allowed_user_ids=self._allowed_user_ids,
|
||||
allowed_role_ids=self._allowed_role_ids,
|
||||
)
|
||||
|
||||
msg = await channel.send(embed=embed, view=view)
|
||||
@@ -3068,6 +3459,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
session_key=session_key,
|
||||
confirm_id=confirm_id,
|
||||
allowed_user_ids=self._allowed_user_ids,
|
||||
allowed_role_ids=self._allowed_role_ids,
|
||||
)
|
||||
|
||||
msg = await channel.send(embed=embed, view=view)
|
||||
@@ -3078,6 +3470,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
async def send_update_prompt(
|
||||
self, chat_id: str, prompt: str, default: str = "",
|
||||
session_key: str = "",
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send an interactive button-based update prompt (Yes / No).
|
||||
|
||||
@@ -3087,9 +3480,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if not self._client or not DISCORD_AVAILABLE:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
try:
|
||||
channel = self._client.get_channel(int(chat_id))
|
||||
target_id = metadata.get("thread_id") if metadata and metadata.get("thread_id") else chat_id
|
||||
channel = self._client.get_channel(int(target_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(chat_id))
|
||||
channel = await self._client.fetch_channel(int(target_id))
|
||||
|
||||
default_hint = f" (default: {default})" if default else ""
|
||||
embed = discord.Embed(
|
||||
@@ -3100,6 +3494,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
view = UpdatePromptView(
|
||||
session_key=session_key,
|
||||
allowed_user_ids=self._allowed_user_ids,
|
||||
allowed_role_ids=self._allowed_role_ids,
|
||||
)
|
||||
msg = await channel.send(embed=embed, view=view)
|
||||
return SendResult(success=True, message_id=str(msg.id))
|
||||
@@ -3157,6 +3552,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
session_key=session_key,
|
||||
on_model_selected=on_model_selected,
|
||||
allowed_user_ids=self._allowed_user_ids,
|
||||
allowed_role_ids=self._allowed_role_ids,
|
||||
)
|
||||
|
||||
msg = await channel.send(embed=embed, view=view)
|
||||
@@ -3417,7 +3813,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if not is_thread and not isinstance(message.channel, discord.DMChannel):
|
||||
no_thread_channels_raw = os.getenv("DISCORD_NO_THREAD_CHANNELS", "")
|
||||
no_thread_channels = {ch.strip() for ch in no_thread_channels_raw.split(",") if ch.strip()}
|
||||
skip_thread = bool(channel_ids & no_thread_channels) or is_free_channel
|
||||
skip_thread = bool(channel_ids & no_thread_channels)
|
||||
auto_thread = os.getenv("DISCORD_AUTO_THREAD", "true").lower() in ("true", "1", "yes")
|
||||
is_reply_message = getattr(message, "type", None) == discord.MessageType.reply
|
||||
if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
|
||||
@@ -3712,6 +4108,72 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
# Discord UI Components (outside the adapter class)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _component_check_auth(
|
||||
interaction,
|
||||
allowed_user_ids: Optional[set],
|
||||
allowed_role_ids: Optional[set],
|
||||
) -> bool:
|
||||
"""Shared user-or-role OR semantics for component view button clicks.
|
||||
|
||||
Mirrors ``DiscordAdapter._is_allowed_user`` / the slash and on_message
|
||||
gates so every Discord interaction surface honors the same trust
|
||||
boundary. Component views (ExecApprovalView, SlashConfirmView,
|
||||
UpdatePromptView, ModelPickerView) used to receive only
|
||||
``allowed_user_ids``: in role-only deployments
|
||||
(DISCORD_ALLOWED_ROLES set, DISCORD_ALLOWED_USERS empty) the user
|
||||
set was empty and the legacy "no allowlist = allow everyone" branch
|
||||
let any guild member click the buttons -- approving exec commands,
|
||||
cancelling slash confirmations, switching the model.
|
||||
|
||||
Behavior:
|
||||
|
||||
- both allowlists empty -> allow (preserves existing no-allowlist
|
||||
deployments, no regression)
|
||||
- user is in user allowlist -> allow
|
||||
- role allowlist set + user has a role in it -> allow
|
||||
- role allowlist set + interaction.user has no resolvable
|
||||
``roles`` attribute (e.g. DM context with a role policy active)
|
||||
-> reject (fail closed)
|
||||
- otherwise -> reject
|
||||
"""
|
||||
user_set = allowed_user_ids or set()
|
||||
role_set = allowed_role_ids or set()
|
||||
has_users = bool(user_set)
|
||||
has_roles = bool(role_set)
|
||||
if not has_users and not has_roles:
|
||||
return True
|
||||
|
||||
user = getattr(interaction, "user", None)
|
||||
if user is None:
|
||||
return False
|
||||
|
||||
if has_users:
|
||||
try:
|
||||
uid = str(user.id)
|
||||
except AttributeError:
|
||||
uid = ""
|
||||
if uid and uid in user_set:
|
||||
return True
|
||||
|
||||
if has_roles:
|
||||
roles_attr = getattr(user, "roles", None)
|
||||
if roles_attr is None:
|
||||
# Role policy is configured but the interaction doesn't
|
||||
# carry role data (DM-context Member, raw User payload).
|
||||
# Fail closed: a user without a resolvable role list cannot
|
||||
# satisfy a role allowlist.
|
||||
return False
|
||||
try:
|
||||
user_role_ids = {getattr(r, "id", None) for r in roles_attr}
|
||||
except TypeError:
|
||||
return False
|
||||
if user_role_ids & role_set:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
if DISCORD_AVAILABLE:
|
||||
|
||||
class ExecApprovalView(discord.ui.View):
|
||||
@@ -3724,17 +4186,23 @@ if DISCORD_AVAILABLE:
|
||||
Only users in the allowed list can click. Times out after 5 minutes.
|
||||
"""
|
||||
|
||||
def __init__(self, session_key: str, allowed_user_ids: set):
|
||||
def __init__(
|
||||
self,
|
||||
session_key: str,
|
||||
allowed_user_ids: set,
|
||||
allowed_role_ids: Optional[set] = None,
|
||||
):
|
||||
super().__init__(timeout=300) # 5-minute timeout
|
||||
self.session_key = session_key
|
||||
self.allowed_user_ids = allowed_user_ids
|
||||
self.allowed_role_ids = allowed_role_ids or set()
|
||||
self.resolved = False
|
||||
|
||||
def _check_auth(self, interaction: discord.Interaction) -> bool:
|
||||
"""Verify the user clicking is authorized."""
|
||||
if not self.allowed_user_ids:
|
||||
return True # No allowlist = anyone can approve
|
||||
return str(interaction.user.id) in self.allowed_user_ids
|
||||
return _component_check_auth(
|
||||
interaction, self.allowed_user_ids, self.allowed_role_ids,
|
||||
)
|
||||
|
||||
async def _resolve(
|
||||
self, interaction: discord.Interaction, choice: str,
|
||||
@@ -3826,17 +4294,24 @@ if DISCORD_AVAILABLE:
|
||||
5 minutes (matches the gateway primitive's timeout).
|
||||
"""
|
||||
|
||||
def __init__(self, session_key: str, confirm_id: str, allowed_user_ids: set):
|
||||
def __init__(
|
||||
self,
|
||||
session_key: str,
|
||||
confirm_id: str,
|
||||
allowed_user_ids: set,
|
||||
allowed_role_ids: Optional[set] = None,
|
||||
):
|
||||
super().__init__(timeout=300)
|
||||
self.session_key = session_key
|
||||
self.confirm_id = confirm_id
|
||||
self.allowed_user_ids = allowed_user_ids
|
||||
self.allowed_role_ids = allowed_role_ids or set()
|
||||
self.resolved = False
|
||||
|
||||
def _check_auth(self, interaction: discord.Interaction) -> bool:
|
||||
if not self.allowed_user_ids:
|
||||
return True
|
||||
return str(interaction.user.id) in self.allowed_user_ids
|
||||
return _component_check_auth(
|
||||
interaction, self.allowed_user_ids, self.allowed_role_ids,
|
||||
)
|
||||
|
||||
async def _resolve(
|
||||
self, interaction: discord.Interaction, choice: str,
|
||||
@@ -3914,16 +4389,22 @@ if DISCORD_AVAILABLE:
|
||||
5-minute timeout on its side).
|
||||
"""
|
||||
|
||||
def __init__(self, session_key: str, allowed_user_ids: set):
|
||||
def __init__(
|
||||
self,
|
||||
session_key: str,
|
||||
allowed_user_ids: set,
|
||||
allowed_role_ids: Optional[set] = None,
|
||||
):
|
||||
super().__init__(timeout=300)
|
||||
self.session_key = session_key
|
||||
self.allowed_user_ids = allowed_user_ids
|
||||
self.allowed_role_ids = allowed_role_ids or set()
|
||||
self.resolved = False
|
||||
|
||||
def _check_auth(self, interaction: discord.Interaction) -> bool:
|
||||
if not self.allowed_user_ids:
|
||||
return True
|
||||
return str(interaction.user.id) in self.allowed_user_ids
|
||||
return _component_check_auth(
|
||||
interaction, self.allowed_user_ids, self.allowed_role_ids,
|
||||
)
|
||||
|
||||
async def _respond(
|
||||
self, interaction: discord.Interaction, answer: str,
|
||||
@@ -4000,6 +4481,7 @@ if DISCORD_AVAILABLE:
|
||||
session_key: str,
|
||||
on_model_selected,
|
||||
allowed_user_ids: set,
|
||||
allowed_role_ids: Optional[set] = None,
|
||||
):
|
||||
super().__init__(timeout=120)
|
||||
self.providers = providers
|
||||
@@ -4008,15 +4490,16 @@ if DISCORD_AVAILABLE:
|
||||
self.session_key = session_key
|
||||
self.on_model_selected = on_model_selected
|
||||
self.allowed_user_ids = allowed_user_ids
|
||||
self.allowed_role_ids = allowed_role_ids or set()
|
||||
self.resolved = False
|
||||
self._selected_provider: str = ""
|
||||
|
||||
self._build_provider_select()
|
||||
|
||||
def _check_auth(self, interaction: discord.Interaction) -> bool:
|
||||
if not self.allowed_user_ids:
|
||||
return True
|
||||
return str(interaction.user.id) in self.allowed_user_ids
|
||||
return _component_check_auth(
|
||||
interaction, self.allowed_user_ids, self.allowed_role_ids,
|
||||
)
|
||||
|
||||
def _build_provider_select(self):
|
||||
"""Build the provider dropdown menu."""
|
||||
|
||||
@@ -416,6 +416,18 @@ class EmailAdapter(BasePlatformAdapter):
|
||||
logger.debug("[Email] Dropping automated sender at dispatch: %s", sender_addr)
|
||||
return
|
||||
|
||||
# Skip senders not in EMAIL_ALLOWED_USERS — prevents the adapter
|
||||
# from creating a MessageEvent (and thus thread context) for senders
|
||||
# that the gateway will never authorize. Without this early guard,
|
||||
# a race between dispatch and authorization can result in the adapter
|
||||
# sending a reply even though the handler returned None.
|
||||
allowed_raw = os.getenv("EMAIL_ALLOWED_USERS", "").strip()
|
||||
if allowed_raw:
|
||||
allowed = {addr.strip().lower() for addr in allowed_raw.split(",") if addr.strip()}
|
||||
if sender_addr.lower() not in allowed:
|
||||
logger.debug("[Email] Dropping non-allowlisted sender at dispatch: %s", sender_addr)
|
||||
return
|
||||
|
||||
subject = msg_data["subject"]
|
||||
body = msg_data["body"].strip()
|
||||
attachments = msg_data["attachments"]
|
||||
|
||||
+294
-92
@@ -64,7 +64,7 @@ from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional, Sequence
|
||||
from typing import Any, Dict, List, Literal, Optional, Sequence
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import Request, urlopen
|
||||
@@ -141,6 +141,7 @@ from gateway.platforms.base import (
|
||||
)
|
||||
from gateway.status import acquire_scoped_lock, release_scoped_lock
|
||||
from hermes_constants import get_hermes_home
|
||||
from utils import atomic_json_write
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -152,6 +153,9 @@ _MARKDOWN_HINT_RE = re.compile(
|
||||
r"(^#{1,6}\s)|(^\s*[-*]\s)|(^\s*\d+\.\s)|(^\s*---+\s*$)|(```)|(`[^`\n]+`)|(\*\*[^*\n].+?\*\*)|(~~[^~\n].+?~~)|(<u>.+?</u>)|(\*[^*\n]+\*)|(\[[^\]]+\]\([^)]+\))|(^>\s)",
|
||||
re.MULTILINE,
|
||||
)
|
||||
# Detect markdown tables: a line starting with | followed by a separator line.
|
||||
# Feishu post-type 'md' elements do not render tables, so we force text mode.
|
||||
_MARKDOWN_TABLE_RE = re.compile(r"^\|.*\|\n\|[-|: ]+\|", re.MULTILINE)
|
||||
_MARKDOWN_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
|
||||
_MARKDOWN_FENCE_OPEN_RE = re.compile(r"^```([^\n`]*)\s*$")
|
||||
_MARKDOWN_FENCE_CLOSE_RE = re.compile(r"^```\s*$")
|
||||
@@ -387,6 +391,8 @@ class FeishuAdapterSettings:
|
||||
admins: frozenset[str] = frozenset()
|
||||
default_group_policy: str = ""
|
||||
group_rules: Dict[str, FeishuGroupRule] = field(default_factory=dict)
|
||||
allow_bots: str = "none" # "none" | "mentions" | "all"
|
||||
require_mention: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -396,6 +402,7 @@ class FeishuGroupRule:
|
||||
policy: str # "open" | "allowlist" | "blacklist" | "admin_only" | "disabled"
|
||||
allowlist: set[str] = field(default_factory=set)
|
||||
blacklist: set[str] = field(default_factory=set)
|
||||
require_mention: Optional[bool] = None # None = inherit global
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -405,6 +412,40 @@ class FeishuBatchState:
|
||||
counts: Dict[str, int] = field(default_factory=dict)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Admission: policy types
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
RejectReason = Literal[
|
||||
"self_echo",
|
||||
"self_ids_unknown",
|
||||
"bots_disabled",
|
||||
"bot_not_mentioned",
|
||||
"group_policy_rejected",
|
||||
]
|
||||
|
||||
|
||||
def _is_bot_sender(sender: Any) -> bool:
|
||||
# receive_v1 docs say {user, bot}; accept "app" defensively.
|
||||
return getattr(sender, "sender_type", "") in ("bot", "app")
|
||||
|
||||
|
||||
def _sender_identity(sender: Any) -> frozenset:
|
||||
# Take any non-empty id variant — tenant sender_id_type decides which are populated.
|
||||
sid = getattr(sender, "sender_id", None)
|
||||
if sid is None:
|
||||
return frozenset()
|
||||
return frozenset(
|
||||
v for v in (
|
||||
getattr(sid, "open_id", None),
|
||||
getattr(sid, "user_id", None),
|
||||
getattr(sid, "union_id", None),
|
||||
)
|
||||
if v
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Markdown rendering helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1377,10 +1418,16 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
for chat_id, rule_cfg in raw_group_rules.items():
|
||||
if not isinstance(rule_cfg, dict):
|
||||
continue
|
||||
# Only override when the key is explicitly set — missing vs false
|
||||
# must not collapse.
|
||||
per_chat_require_mention: Optional[bool] = None
|
||||
if "require_mention" in rule_cfg:
|
||||
per_chat_require_mention = _to_boolean(rule_cfg.get("require_mention"))
|
||||
group_rules[str(chat_id)] = FeishuGroupRule(
|
||||
policy=str(rule_cfg.get("policy", "open")).strip().lower(),
|
||||
allowlist=set(str(u).strip() for u in rule_cfg.get("allowlist", []) if str(u).strip()),
|
||||
blacklist=set(str(u).strip() for u in rule_cfg.get("blacklist", []) if str(u).strip()),
|
||||
require_mention=per_chat_require_mention,
|
||||
)
|
||||
|
||||
# Bot-level admins
|
||||
@@ -1390,6 +1437,16 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
# Default group policy (for groups not in group_rules)
|
||||
default_group_policy = str(extra.get("default_group_policy", "")).strip().lower()
|
||||
|
||||
# Env-only so adapter and gateway auth bypass share one source; yaml
|
||||
# feishu.allow_bots is bridged to this env var at config load.
|
||||
allow_bots = os.getenv("FEISHU_ALLOW_BOTS", "none").strip().lower()
|
||||
if allow_bots not in ("none", "mentions", "all"):
|
||||
logger.warning(
|
||||
"[Feishu] Unknown allow_bots=%r, falling back to 'none'. Valid: none, mentions, all.",
|
||||
allow_bots,
|
||||
)
|
||||
allow_bots = "none"
|
||||
|
||||
return FeishuAdapterSettings(
|
||||
app_id=str(extra.get("app_id") or os.getenv("FEISHU_APP_ID", "")).strip(),
|
||||
app_secret=str(extra.get("app_secret") or os.getenv("FEISHU_APP_SECRET", "")).strip(),
|
||||
@@ -1446,6 +1503,10 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
admins=admins,
|
||||
default_group_policy=default_group_policy,
|
||||
group_rules=group_rules,
|
||||
allow_bots=allow_bots,
|
||||
require_mention=_to_boolean(
|
||||
extra.get("require_mention", os.getenv("FEISHU_REQUIRE_MENTION", "true"))
|
||||
),
|
||||
)
|
||||
|
||||
def _apply_settings(self, settings: FeishuAdapterSettings) -> None:
|
||||
@@ -1476,6 +1537,8 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
self._ws_reconnect_interval = settings.ws_reconnect_interval
|
||||
self._ws_ping_interval = settings.ws_ping_interval
|
||||
self._ws_ping_timeout = settings.ws_ping_timeout
|
||||
self._allow_bots = settings.allow_bots
|
||||
self._require_mention = settings.require_mention
|
||||
|
||||
def _build_event_handler(self) -> Any:
|
||||
if EventDispatcherHandler is None:
|
||||
@@ -2189,30 +2252,28 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
event = getattr(data, "event", None)
|
||||
message = getattr(event, "message", None)
|
||||
sender = getattr(event, "sender", None)
|
||||
sender_id = getattr(sender, "sender_id", None)
|
||||
if not message or not sender_id:
|
||||
logger.debug("[Feishu] Dropping malformed inbound event: missing message or sender_id")
|
||||
if not message or not sender or not getattr(sender, "sender_id", None):
|
||||
logger.debug("[Feishu] Dropping malformed inbound event: missing message/sender")
|
||||
return
|
||||
|
||||
message_id = getattr(message, "message_id", None)
|
||||
if not message_id or self._is_duplicate(message_id):
|
||||
logger.debug("[Feishu] Dropping duplicate/missing message_id: %s", message_id)
|
||||
return
|
||||
if self._is_self_sent_bot_message(event):
|
||||
logger.debug("[Feishu] Dropping self-sent bot event: %s", message_id)
|
||||
|
||||
reason = self._admit(sender, message)
|
||||
if reason is not None:
|
||||
logger.debug("[Feishu] dropping inbound event: %s", reason)
|
||||
return
|
||||
|
||||
chat_type = getattr(message, "chat_type", "p2p")
|
||||
chat_id = getattr(message, "chat_id", "") or ""
|
||||
if chat_type != "p2p" and not self._should_accept_group_message(message, sender_id, chat_id):
|
||||
logger.debug("[Feishu] Dropping group message that failed mention/policy gate: %s", message_id)
|
||||
return
|
||||
await self._process_inbound_message(
|
||||
data=data,
|
||||
message=message,
|
||||
sender_id=sender_id,
|
||||
sender_id=getattr(sender, "sender_id", None),
|
||||
chat_type=chat_type,
|
||||
message_id=message_id,
|
||||
is_bot=_is_bot_sender(sender),
|
||||
)
|
||||
|
||||
def _on_message_read_event(self, data: P2ImMessageMessageReadV1) -> None:
|
||||
@@ -2389,10 +2450,11 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
msg = items[0] if items else None
|
||||
if not msg:
|
||||
return
|
||||
# GET im/v1/messages returns sender.id=app_id for bot messages —
|
||||
# peer bots and us share sender_type="app" but differ on app_id.
|
||||
sender = getattr(msg, "sender", None)
|
||||
sender_type = str(getattr(sender, "sender_type", "") or "").lower()
|
||||
if sender_type != "app":
|
||||
return # only route reactions on our own bot messages
|
||||
if str(getattr(sender, "id", "") or "") != self._app_id:
|
||||
return # only route reactions on this bot's own messages
|
||||
chat_id = str(getattr(msg, "chat_id", "") or "")
|
||||
chat_type_raw = str(getattr(msg, "chat_type", "p2p") or "p2p")
|
||||
if not chat_id:
|
||||
@@ -2679,6 +2741,7 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
sender_id: Any,
|
||||
chat_type: str,
|
||||
message_id: str,
|
||||
is_bot: bool = False,
|
||||
) -> None:
|
||||
text, inbound_type, media_urls, media_types, mentions = await self._extract_message_content(message)
|
||||
|
||||
@@ -2697,34 +2760,45 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
if hint:
|
||||
text = f"{hint}\n\n{text}" if text else hint
|
||||
|
||||
thread_id = getattr(message, "thread_id", None) or getattr(message, "root_id", None) or None
|
||||
reply_to_message_id = (
|
||||
getattr(message, "parent_id", None)
|
||||
or getattr(message, "upper_message_id", None)
|
||||
or getattr(message, "root_id", None)
|
||||
or None
|
||||
)
|
||||
reply_to_text = await self._fetch_message_text(reply_to_message_id) if reply_to_message_id else None
|
||||
|
||||
sender_primary = (
|
||||
getattr(sender_id, "open_id", None)
|
||||
or getattr(sender_id, "user_id", None)
|
||||
or getattr(sender_id, "union_id", None)
|
||||
or "<unknown>"
|
||||
)
|
||||
logger.info(
|
||||
"[Feishu] Inbound %s message received: id=%s type=%s chat_id=%s text=%r media=%d",
|
||||
"[Feishu] Inbound %s message received: id=%s type=%s chat_id=%s sender=%s:%s text=%r media=%d",
|
||||
"dm" if chat_type == "p2p" else "group",
|
||||
message_id,
|
||||
inbound_type.value,
|
||||
getattr(message, "chat_id", "") or "",
|
||||
"bot" if is_bot else "user",
|
||||
sender_primary,
|
||||
text[:120],
|
||||
len(media_urls),
|
||||
)
|
||||
|
||||
chat_id = getattr(message, "chat_id", "") or ""
|
||||
chat_info = await self.get_chat_info(chat_id)
|
||||
sender_profile = await self._resolve_sender_profile(sender_id)
|
||||
sender_profile = await self._resolve_sender_profile(sender_id, is_bot=is_bot)
|
||||
source = self.build_source(
|
||||
chat_id=chat_id,
|
||||
chat_name=chat_info.get("name") or chat_id or "Feishu Chat",
|
||||
chat_type=self._resolve_source_chat_type(chat_info=chat_info, event_chat_type=chat_type),
|
||||
user_id=sender_profile["user_id"],
|
||||
user_name=sender_profile["user_name"],
|
||||
thread_id=getattr(message, "thread_id", None) or None,
|
||||
thread_id=thread_id,
|
||||
user_id_alt=sender_profile["user_id_alt"],
|
||||
is_bot=is_bot,
|
||||
)
|
||||
normalized = MessageEvent(
|
||||
text=text,
|
||||
@@ -2853,13 +2927,18 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
# Snapshot Content-Type and body while the client context is
|
||||
# still active so pooled connections fully release on exit.
|
||||
# See #18451.
|
||||
content_type_hdr = str(response.headers.get("Content-Type", ""))
|
||||
body = response.content
|
||||
filename = self._derive_remote_filename(
|
||||
file_url,
|
||||
content_type=str(response.headers.get("Content-Type", "")),
|
||||
content_type=content_type_hdr,
|
||||
default_name=preferred_name,
|
||||
default_ext=default_ext,
|
||||
)
|
||||
cached_path = cache_document_from_bytes(response.content, filename)
|
||||
cached_path = cache_document_from_bytes(body, filename)
|
||||
return cached_path, filename
|
||||
|
||||
@staticmethod
|
||||
@@ -3447,7 +3526,12 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
return "dm"
|
||||
return "group"
|
||||
|
||||
async def _resolve_sender_profile(self, sender_id: Any) -> Dict[str, Optional[str]]:
|
||||
async def _resolve_sender_profile(
|
||||
self,
|
||||
sender_id: Any,
|
||||
*,
|
||||
is_bot: bool = False,
|
||||
) -> Dict[str, Optional[str]]:
|
||||
"""Map Feishu's three-tier user IDs onto Hermes' SessionSource fields.
|
||||
|
||||
Preference order for the primary ``user_id`` field:
|
||||
@@ -3464,7 +3548,11 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
union_id = getattr(sender_id, "union_id", None) or None
|
||||
# Prefer tenant-scoped user_id; fall back to app-scoped open_id.
|
||||
primary_id = user_id or open_id
|
||||
display_name = await self._resolve_sender_name_from_api(primary_id or union_id)
|
||||
# bot/v3/bots/basic_batch only accepts open_id.
|
||||
name_lookup_id = open_id if is_bot else (primary_id or union_id)
|
||||
display_name = await self._resolve_sender_name_from_api(
|
||||
name_lookup_id, is_bot=is_bot,
|
||||
)
|
||||
return {
|
||||
"user_id": primary_id,
|
||||
"user_name": display_name,
|
||||
@@ -3484,11 +3572,14 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
self._sender_name_cache.pop(sender_id, None)
|
||||
return None
|
||||
|
||||
async def _resolve_sender_name_from_api(self, sender_id: Optional[str]) -> Optional[str]:
|
||||
"""Fetch the sender's display name from the Feishu contact API with a 10-minute cache.
|
||||
|
||||
ID-type detection mirrors openclaw: ou_ → open_id, on_ → union_id, else user_id.
|
||||
Failures are silently suppressed; the message pipeline must not block on name resolution.
|
||||
async def _resolve_sender_name_from_api(
|
||||
self,
|
||||
sender_id: Optional[str],
|
||||
*,
|
||||
is_bot: bool = False,
|
||||
) -> Optional[str]:
|
||||
"""Bots divert to bot/basic_batch — contact API doesn't return bot names.
|
||||
Failures are silent so the pipeline never blocks on name resolution.
|
||||
"""
|
||||
if not sender_id or not self._client:
|
||||
return None
|
||||
@@ -3498,7 +3589,16 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
now = time.time()
|
||||
cached_name = self._get_cached_sender_name(trimmed)
|
||||
if cached_name is not None:
|
||||
return cached_name
|
||||
return cached_name or None # "" cached means "known nameless"
|
||||
if is_bot:
|
||||
names = await self._fetch_bot_names([trimmed])
|
||||
if names is None:
|
||||
return None
|
||||
expire_at = now + _FEISHU_SENDER_NAME_TTL_SECONDS
|
||||
for oid, name in names.items():
|
||||
self._sender_name_cache[oid] = (name, expire_at)
|
||||
hit = self._sender_name_cache.get(trimmed)
|
||||
return (hit[0] or None) if hit else None
|
||||
try:
|
||||
from lark_oapi.api.contact.v3 import GetUserRequest # lazy import
|
||||
if trimmed.startswith("ou_"):
|
||||
@@ -3527,6 +3627,35 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
logger.debug("[Feishu] Failed to resolve sender name for %s", sender_id, exc_info=True)
|
||||
return None
|
||||
|
||||
async def _fetch_bot_names(self, bot_ids: List[str]) -> Optional[Dict[str, str]]:
|
||||
if not self._client or not bot_ids:
|
||||
return None
|
||||
try:
|
||||
req = (
|
||||
BaseRequest.builder()
|
||||
.http_method(HttpMethod.GET)
|
||||
.uri("/open-apis/bot/v3/bots/basic_batch")
|
||||
.queries([("bot_ids", oid) for oid in bot_ids])
|
||||
.token_types({AccessTokenType.TENANT})
|
||||
.build()
|
||||
)
|
||||
resp = await asyncio.to_thread(self._client.request, req)
|
||||
content = getattr(getattr(resp, "raw", None), "content", None)
|
||||
if not content:
|
||||
return None
|
||||
payload = json.loads(content)
|
||||
if payload.get("code") != 0:
|
||||
return None
|
||||
bots = (payload.get("data") or {}).get("bots") or {}
|
||||
return {
|
||||
oid: str(info.get("name") or "").strip()
|
||||
for oid, info in bots.items()
|
||||
if oid
|
||||
}
|
||||
except Exception:
|
||||
logger.debug("[Feishu] Failed to fetch bot names for %s", bot_ids, exc_info=True)
|
||||
return None
|
||||
|
||||
async def _fetch_message_text(self, message_id: str) -> Optional[str]:
|
||||
if not self._client or not message_id:
|
||||
return None
|
||||
@@ -3590,10 +3719,60 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
logger.exception("[Feishu] Background inbound processing failed")
|
||||
|
||||
# =========================================================================
|
||||
# Group policy and mention gating
|
||||
# Inbound admission
|
||||
# =========================================================================
|
||||
|
||||
def _allow_group_message(self, sender_id: Any, chat_id: str = "") -> bool:
|
||||
def _admit(self, sender: Any, message: Any) -> Optional[RejectReason]:
|
||||
sender_ids = _sender_identity(sender)
|
||||
self_ids = frozenset(v for v in (self._bot_open_id, self._bot_user_id) if v)
|
||||
is_bot = _is_bot_sender(sender)
|
||||
is_group = getattr(message, "chat_type", "p2p") != "p2p"
|
||||
chat_id = getattr(message, "chat_id", "") or ""
|
||||
require_mention = is_group and self._require_mention_for(chat_id)
|
||||
|
||||
# Defensive only — Feishu doesn't echo our outbound back as inbound,
|
||||
# and open_id is always populated on both sides.
|
||||
if self_ids and sender_ids & self_ids:
|
||||
return "self_echo"
|
||||
|
||||
if is_bot:
|
||||
mode = self._allow_bots
|
||||
if mode != "mentions" and mode != "all":
|
||||
return "bots_disabled"
|
||||
# Defensive: pre-hydration or malformed payloads.
|
||||
if not self_ids or not sender_ids:
|
||||
return "self_ids_unknown"
|
||||
# Step 4 covers mention enforcement for groups when require_mention
|
||||
# is on; check here only on paths step 4 won't reach.
|
||||
if mode == "mentions" and not require_mention and not self._mentions_self(message):
|
||||
return "bot_not_mentioned"
|
||||
|
||||
if not is_group:
|
||||
return None
|
||||
|
||||
if not self._allow_group_message(
|
||||
getattr(sender, "sender_id", None), chat_id, is_bot=is_bot,
|
||||
):
|
||||
return "group_policy_rejected"
|
||||
if require_mention and not self._mentions_self(message):
|
||||
return "group_policy_rejected"
|
||||
return None
|
||||
|
||||
def _require_mention_for(self, chat_id: str) -> bool:
|
||||
rule = self._group_rules.get(chat_id) if chat_id else None
|
||||
if rule and rule.require_mention is not None:
|
||||
return rule.require_mention
|
||||
return self._require_mention
|
||||
|
||||
# --- Group policy ---------------------------------------------------------
|
||||
|
||||
def _allow_group_message(
|
||||
self,
|
||||
sender_id: Any,
|
||||
chat_id: str = "",
|
||||
*,
|
||||
is_bot: bool = False,
|
||||
) -> bool:
|
||||
"""Per-group policy gate for non-DM traffic."""
|
||||
sender_open_id = getattr(sender_id, "open_id", None)
|
||||
sender_user_id = getattr(sender_id, "user_id", None)
|
||||
@@ -3612,12 +3791,17 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
allowlist = self._allowed_group_users
|
||||
blacklist = set()
|
||||
|
||||
# Channel locks apply to everyone; allowlist/blacklist only gate humans
|
||||
# (bots were already cleared upstream by FEISHU_ALLOW_BOTS).
|
||||
if policy == "disabled":
|
||||
return False
|
||||
if policy == "open":
|
||||
return True
|
||||
if policy == "admin_only":
|
||||
return False
|
||||
if is_bot:
|
||||
return True
|
||||
|
||||
if policy == "allowlist":
|
||||
return bool(sender_ids and (sender_ids & allowlist))
|
||||
if policy == "blacklist":
|
||||
@@ -3625,17 +3809,16 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
|
||||
return bool(sender_ids and (sender_ids & self._allowed_group_users))
|
||||
|
||||
def _should_accept_group_message(self, message: Any, sender_id: Any, chat_id: str = "") -> bool:
|
||||
"""Require an explicit @mention before group messages enter the agent."""
|
||||
if not self._allow_group_message(sender_id, chat_id):
|
||||
return False
|
||||
# @_all is Feishu's @everyone placeholder — always route to the bot.
|
||||
# --- Mention detection ----------------------------------------------------
|
||||
|
||||
def _mentions_self(self, message: Any) -> bool:
|
||||
# @_all is Feishu's @everyone placeholder.
|
||||
raw_content = getattr(message, "content", "") or ""
|
||||
if "@_all" in raw_content:
|
||||
return True
|
||||
mentions = getattr(message, "mentions", None) or []
|
||||
if mentions:
|
||||
return self._message_mentions_bot(mentions)
|
||||
if mentions and self._message_mentions_bot(mentions):
|
||||
return True
|
||||
normalized = normalize_feishu_message(
|
||||
message_type=getattr(message, "message_type", "") or "",
|
||||
raw_content=raw_content,
|
||||
@@ -3644,23 +3827,6 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
)
|
||||
return self._post_mentions_bot(normalized.mentions)
|
||||
|
||||
def _is_self_sent_bot_message(self, event: Any) -> bool:
|
||||
"""Return True only for Feishu events emitted by this Hermes bot."""
|
||||
sender = getattr(event, "sender", None)
|
||||
sender_type = str(getattr(sender, "sender_type", "") or "").strip().lower()
|
||||
if sender_type not in {"bot", "app"}:
|
||||
return False
|
||||
|
||||
sender_id = getattr(sender, "sender_id", None)
|
||||
sender_open_id = str(getattr(sender_id, "open_id", "") or "").strip()
|
||||
sender_user_id = str(getattr(sender_id, "user_id", "") or "").strip()
|
||||
|
||||
if self._bot_open_id and sender_open_id == self._bot_open_id:
|
||||
return True
|
||||
if self._bot_user_id and sender_user_id == self._bot_user_id:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _message_mentions_bot(self, mentions: List[Any]) -> bool:
|
||||
# IDs trump names: when both sides have open_id (or both user_id),
|
||||
# match requires equal IDs. Name fallback only when either side
|
||||
@@ -3699,47 +3865,50 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
and self-sent bot event filtering.
|
||||
|
||||
Populates ``_bot_open_id`` and ``_bot_name`` from /open-apis/bot/v3/info
|
||||
(no extra scopes required beyond the tenant access token). Falls back to
|
||||
the application info endpoint for ``_bot_name`` only when the first probe
|
||||
doesn't return it. Each field is hydrated independently — a value already
|
||||
supplied via env vars (FEISHU_BOT_OPEN_ID / FEISHU_BOT_USER_ID /
|
||||
FEISHU_BOT_NAME) is preserved and skips its probe.
|
||||
(no extra scopes required beyond the tenant access token). The probe
|
||||
always runs when a client is available so stale env vars from app/bot
|
||||
migrations do not break group @mention gating. Falls back to the
|
||||
application info endpoint for ``_bot_name`` only when the first probe
|
||||
doesn't return it. If the probe fails, env-provided values are preserved.
|
||||
"""
|
||||
if not self._client:
|
||||
return
|
||||
if self._bot_open_id and self._bot_name:
|
||||
# Everything the self-send filter and precise mention gate need is
|
||||
# already in place; nothing to probe.
|
||||
return
|
||||
|
||||
# Primary probe: /open-apis/bot/v3/info — returns bot_name + open_id, no
|
||||
# extra scopes required. This is the same endpoint the onboarding wizard
|
||||
# uses via probe_bot().
|
||||
if not self._bot_open_id or not self._bot_name:
|
||||
try:
|
||||
req = (
|
||||
BaseRequest.builder()
|
||||
.http_method(HttpMethod.GET)
|
||||
.uri("/open-apis/bot/v3/info")
|
||||
.token_types({AccessTokenType.TENANT})
|
||||
.build()
|
||||
)
|
||||
resp = await asyncio.to_thread(self._client.request, req)
|
||||
content = getattr(getattr(resp, "raw", None), "content", None)
|
||||
if content:
|
||||
payload = json.loads(content)
|
||||
parsed = _parse_bot_response(payload) or {}
|
||||
open_id = (parsed.get("bot_open_id") or "").strip()
|
||||
bot_name = (parsed.get("bot_name") or "").strip()
|
||||
if open_id and not self._bot_open_id:
|
||||
self._bot_open_id = open_id
|
||||
if bot_name and not self._bot_name:
|
||||
self._bot_name = bot_name
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[Feishu] /bot/v3/info probe failed during hydration",
|
||||
exc_info=True,
|
||||
)
|
||||
try:
|
||||
req = (
|
||||
BaseRequest.builder()
|
||||
.http_method(HttpMethod.GET)
|
||||
.uri("/open-apis/bot/v3/info")
|
||||
.token_types({AccessTokenType.TENANT})
|
||||
.build()
|
||||
)
|
||||
resp = await asyncio.to_thread(self._client.request, req)
|
||||
content = getattr(getattr(resp, "raw", None), "content", None)
|
||||
if content:
|
||||
payload = json.loads(content)
|
||||
parsed = _parse_bot_response(payload) or {}
|
||||
open_id = (parsed.get("bot_open_id") or "").strip()
|
||||
bot_name = (parsed.get("bot_name") or "").strip()
|
||||
if open_id:
|
||||
if self._bot_open_id and self._bot_open_id != open_id:
|
||||
logger.warning(
|
||||
"[Feishu] FEISHU_BOT_OPEN_ID is stale; using /bot/v3/info open_id for group @mention gating."
|
||||
)
|
||||
self._bot_open_id = open_id
|
||||
if bot_name:
|
||||
if self._bot_name and self._bot_name != bot_name:
|
||||
logger.info(
|
||||
"[Feishu] FEISHU_BOT_NAME differs from /bot/v3/info; using hydrated bot name for group @mention gating."
|
||||
)
|
||||
self._bot_name = bot_name
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[Feishu] /bot/v3/info probe failed during hydration",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Fallback probe for _bot_name only: application info endpoint. Needs
|
||||
# admin:app.info:readonly or application:application:self_manage scope,
|
||||
@@ -3784,7 +3953,14 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
if isinstance(seen_data, list):
|
||||
entries: Dict[str, float] = {str(item).strip(): 0.0 for item in seen_data if str(item).strip()}
|
||||
elif isinstance(seen_data, dict):
|
||||
entries = {k: float(v) for k, v in seen_data.items() if isinstance(k, str) and k.strip()}
|
||||
entries = {}
|
||||
for key, value in seen_data.items():
|
||||
if not isinstance(key, str) or not key.strip():
|
||||
continue
|
||||
try:
|
||||
entries[key] = float(value)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
else:
|
||||
return
|
||||
# Filter out TTL-expired entries (entries saved with ts=0.0 are treated as immortal
|
||||
@@ -3804,7 +3980,7 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
recent = self._seen_message_order[-self._dedup_cache_size:]
|
||||
# Save as {msg_id: timestamp} so TTL filtering works across restarts.
|
||||
payload = {"message_ids": {k: self._seen_message_ids[k] for k in recent if k in self._seen_message_ids}}
|
||||
self._dedup_state_path.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8")
|
||||
atomic_json_write(self._dedup_state_path, payload, indent=None)
|
||||
except OSError:
|
||||
logger.warning("[Feishu] Failed to persist dedup state to %s", self._dedup_state_path, exc_info=True)
|
||||
|
||||
@@ -3829,6 +4005,12 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
# =========================================================================
|
||||
|
||||
def _build_outbound_payload(self, content: str) -> tuple[str, str]:
|
||||
# Feishu post-type 'md' elements do not render markdown tables; sending
|
||||
# table content as post causes the message to appear blank on the client.
|
||||
# Force plain text for anything that looks like a markdown table.
|
||||
if _MARKDOWN_TABLE_RE.search(content):
|
||||
text_payload = {"text": content}
|
||||
return "text", json.dumps(text_payload, ensure_ascii=False)
|
||||
if _MARKDOWN_HINT_RE.search(content):
|
||||
return "post", _build_markdown_post_payload(content)
|
||||
text_payload = {"text": content}
|
||||
@@ -3907,15 +4089,18 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
reply_to: Optional[str],
|
||||
metadata: Optional[Dict[str, Any]],
|
||||
) -> Any:
|
||||
effective_reply_to = reply_to
|
||||
if not effective_reply_to and metadata and metadata.get("thread_id"):
|
||||
effective_reply_to = metadata.get("reply_to_message_id")
|
||||
reply_in_thread = bool((metadata or {}).get("thread_id"))
|
||||
if reply_to:
|
||||
if effective_reply_to:
|
||||
body = self._build_reply_message_body(
|
||||
content=payload,
|
||||
msg_type=msg_type,
|
||||
reply_in_thread=reply_in_thread,
|
||||
uuid_value=str(uuid.uuid4()),
|
||||
)
|
||||
request = self._build_reply_message_request(reply_to, body)
|
||||
request = self._build_reply_message_request(effective_reply_to, body)
|
||||
return await asyncio.to_thread(self._client.im.v1.message.reply, request)
|
||||
|
||||
body = self._build_create_message_body(
|
||||
@@ -3924,7 +4109,15 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
content=payload,
|
||||
uuid_value=str(uuid.uuid4()),
|
||||
)
|
||||
request = self._build_create_message_request("chat_id", body)
|
||||
# Detect whether chat_id is a user open_id (DM) or a chat_id (group).
|
||||
# Feishu API expects receive_id_type="open_id" for user DMs (ou_ prefix)
|
||||
# and receive_id_type="chat_id" for group chats (oc_ prefix, which IS
|
||||
# the chat_id format — see https://open.feishu.cn/document/).
|
||||
if chat_id.startswith("ou_"):
|
||||
receive_id_type = "open_id"
|
||||
else:
|
||||
receive_id_type = "chat_id"
|
||||
request = self._build_create_message_request(receive_id_type, body)
|
||||
return await asyncio.to_thread(self._client.im.v1.message.create, request)
|
||||
|
||||
@staticmethod
|
||||
@@ -4066,6 +4259,15 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
if active_reply_to and not self._response_succeeded(response):
|
||||
code = getattr(response, "code", None)
|
||||
if code in _FEISHU_REPLY_FALLBACK_CODES:
|
||||
if (metadata or {}).get("thread_id"):
|
||||
logger.warning(
|
||||
"[Feishu] Reply to %s failed in thread %s (code %s — message withdrawn/missing); "
|
||||
"skipping top-level fallback to avoid creating a new topic",
|
||||
active_reply_to,
|
||||
(metadata or {}).get("thread_id"),
|
||||
code,
|
||||
)
|
||||
return response
|
||||
logger.warning(
|
||||
"[Feishu] Reply to %s failed (code %s — message withdrawn/missing); "
|
||||
"falling back to new message in chat %s",
|
||||
|
||||
@@ -13,6 +13,8 @@ import time
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Dict
|
||||
|
||||
from utils import atomic_json_write
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gateway.platforms.base import MessageEvent
|
||||
|
||||
@@ -220,34 +222,37 @@ class ThreadParticipationTracker:
|
||||
def __init__(self, platform_name: str, max_tracked: int = 500):
|
||||
self._platform = platform_name
|
||||
self._max_tracked = max_tracked
|
||||
self._threads: set = self._load()
|
||||
self._threads: dict[str, None] = {
|
||||
str(thread_id): None for thread_id in self._load()
|
||||
}
|
||||
|
||||
def _state_path(self) -> Path:
|
||||
from hermes_constants import get_hermes_home
|
||||
return get_hermes_home() / f"{self._platform}_threads.json"
|
||||
|
||||
def _load(self) -> set:
|
||||
def _load(self) -> list[str]:
|
||||
path = self._state_path()
|
||||
if path.exists():
|
||||
try:
|
||||
return set(json.loads(path.read_text(encoding="utf-8")))
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
if isinstance(data, list):
|
||||
return [str(thread_id) for thread_id in data]
|
||||
except Exception:
|
||||
pass
|
||||
return set()
|
||||
return []
|
||||
|
||||
def _save(self) -> None:
|
||||
path = self._state_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
thread_list = list(self._threads)
|
||||
if len(thread_list) > self._max_tracked:
|
||||
thread_list = thread_list[-self._max_tracked:]
|
||||
self._threads = set(thread_list)
|
||||
path.write_text(json.dumps(thread_list), encoding="utf-8")
|
||||
self._threads = {thread_id: None for thread_id in thread_list}
|
||||
atomic_json_write(path, thread_list, indent=None)
|
||||
|
||||
def mark(self, thread_id: str) -> None:
|
||||
"""Mark *thread_id* as participated and persist."""
|
||||
if thread_id not in self._threads:
|
||||
self._threads.add(thread_id)
|
||||
self._threads[thread_id] = None
|
||||
self._save()
|
||||
|
||||
def __contains__(self, thread_id: str) -> bool:
|
||||
|
||||
@@ -139,7 +139,7 @@ class HomeAssistantAdapter(BasePlatformAdapter):
|
||||
|
||||
async def _ws_connect(self) -> bool:
|
||||
"""Establish WebSocket connection and authenticate."""
|
||||
ws_url = self._hass_url.replace("http://", "ws://").replace("https://", "wss://")
|
||||
ws_url = self._hass_url.replace("https://", "wss://").replace("http://", "ws://")
|
||||
ws_url = f"{ws_url}/api/websocket"
|
||||
|
||||
self._session = aiohttp.ClientSession(
|
||||
|
||||
@@ -243,10 +243,14 @@ class QQAdapter(BasePlatformAdapter):
|
||||
return False
|
||||
|
||||
try:
|
||||
# Tighter keepalive pool so idle CLOSE_WAIT sockets drain
|
||||
# faster behind proxies like Cloudflare Warp (#18451).
|
||||
from gateway.platforms._http_client_limits import platform_httpx_limits
|
||||
self._http_client = httpx.AsyncClient(
|
||||
timeout=30.0,
|
||||
follow_redirects=True,
|
||||
event_hooks={"response": [_ssrf_redirect_guard]},
|
||||
limits=platform_httpx_limits(),
|
||||
)
|
||||
|
||||
# 1. Get access token
|
||||
@@ -393,13 +397,24 @@ class QQAdapter(BasePlatformAdapter):
|
||||
await self._session.close()
|
||||
self._session = None
|
||||
|
||||
self._session = aiohttp.ClientSession()
|
||||
# Honor WSL proxy env for QQ WebSocket. Hermes upgrades overwrite this
|
||||
# local patch, so QQ can regress to direct-connect timeouts after update.
|
||||
self._session = aiohttp.ClientSession(trust_env=True)
|
||||
ws_proxy = (
|
||||
os.getenv("WSS_PROXY")
|
||||
or os.getenv("wss_proxy")
|
||||
or os.getenv("HTTPS_PROXY")
|
||||
or os.getenv("https_proxy")
|
||||
or os.getenv("ALL_PROXY")
|
||||
or os.getenv("all_proxy")
|
||||
)
|
||||
self._ws = await self._session.ws_connect(
|
||||
gateway_url,
|
||||
headers={
|
||||
"User-Agent": build_user_agent(),
|
||||
},
|
||||
timeout=CONNECT_TIMEOUT_SECONDS,
|
||||
proxy=ws_proxy,
|
||||
)
|
||||
logger.info("[%s] WebSocket connected to %s", self._log_tag, gateway_url)
|
||||
|
||||
|
||||
@@ -192,6 +192,15 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
group_allowed_str = os.getenv("SIGNAL_GROUP_ALLOWED_USERS", "")
|
||||
self.group_allow_from = set(_parse_comma_list(group_allowed_str))
|
||||
|
||||
# DM allowlist — mirrors SIGNAL_ALLOWED_USERS checked by run.py.
|
||||
# Stored here so the reaction hooks can skip unauthorized senders
|
||||
# (reactions fire before run.py's auth gate, so without this check
|
||||
# every inbound DM from any contact gets a 👀 reaction).
|
||||
# "*" means all users allowed (open mode); empty means no restriction
|
||||
# recorded at adapter level (run.py still enforces auth separately).
|
||||
dm_allowed_str = os.getenv("SIGNAL_ALLOWED_USERS", "*")
|
||||
self.dm_allow_from = set(_parse_comma_list(dm_allowed_str))
|
||||
|
||||
# HTTP client
|
||||
self.client: Optional[httpx.AsyncClient] = None
|
||||
|
||||
@@ -248,7 +257,9 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
except Exception as e:
|
||||
logger.warning("Signal: Could not acquire phone lock (non-fatal): %s", e)
|
||||
|
||||
self.client = httpx.AsyncClient(timeout=30.0)
|
||||
# Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
|
||||
from gateway.platforms._http_client_limits import platform_httpx_limits
|
||||
self.client = httpx.AsyncClient(timeout=30.0, limits=platform_httpx_limits())
|
||||
try:
|
||||
# Health check — verify signal-cli daemon is reachable
|
||||
try:
|
||||
@@ -534,6 +545,18 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
except Exception:
|
||||
logger.exception("Signal: failed to fetch attachment %s", att_id)
|
||||
|
||||
# Skip envelopes with no meaningful content (no text, no attachments).
|
||||
# Catches profile key updates, empty messages, and other metadata-only
|
||||
# envelopes that still carry a dataMessage wrapper but have nothing
|
||||
# worth processing. See issue: signal-cli logs "Profile key update" +
|
||||
# Hermes receives msg='' triggering a full agent turn for nothing.
|
||||
if (not text or not text.strip()) and not media_urls:
|
||||
logger.debug(
|
||||
"Signal: skipping contentless envelope from %s (%d attachments)",
|
||||
redact_phone(sender), len(media_urls) if media_urls else 0,
|
||||
)
|
||||
return
|
||||
|
||||
# Build session source
|
||||
source = self.build_source(
|
||||
chat_id=chat_id,
|
||||
@@ -1416,8 +1439,28 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
return None
|
||||
return (author, ts)
|
||||
|
||||
def _reactions_enabled(self, event: "MessageEvent" = None) -> bool:
|
||||
"""Check if message reactions are enabled for this event.
|
||||
|
||||
Two gates:
|
||||
1. SIGNAL_REACTIONS env var — set to false/0/no to disable globally.
|
||||
2. DM allowlist — if SIGNAL_ALLOWED_USERS is set, only react to
|
||||
messages from senders in that list. This prevents unauthorized
|
||||
contacts from seeing the 👀 reaction (which fires before run.py's
|
||||
auth gate and would otherwise reveal that a bot is listening).
|
||||
"""
|
||||
if os.getenv("SIGNAL_REACTIONS", "true").lower() in ("false", "0", "no"):
|
||||
return False
|
||||
if event is not None:
|
||||
sender = getattr(getattr(event, "source", None), "user_id", None)
|
||||
if sender and "*" not in self.dm_allow_from and sender not in self.dm_allow_from:
|
||||
return False
|
||||
return True
|
||||
|
||||
async def on_processing_start(self, event: MessageEvent) -> None:
|
||||
"""React with 👀 when processing begins."""
|
||||
if not self._reactions_enabled(event):
|
||||
return
|
||||
target = self._extract_reaction_target(event)
|
||||
if target:
|
||||
await self.send_reaction(event.source.chat_id, "👀", *target)
|
||||
@@ -1428,6 +1471,8 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
On CANCELLED we leave the 👀 in place — no terminal outcome means
|
||||
the reaction should keep reflecting "in progress" (matches Telegram).
|
||||
"""
|
||||
if not self._reactions_enabled(event):
|
||||
return
|
||||
if outcome == ProcessingOutcome.CANCELLED:
|
||||
return
|
||||
target = self._extract_reaction_target(event)
|
||||
|
||||
+236
-13
@@ -9,6 +9,7 @@ Uses slack-bolt (Python) with Socket Mode for:
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import contextvars
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -21,6 +22,7 @@ try:
|
||||
from slack_bolt.async_app import AsyncApp
|
||||
from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
|
||||
from slack_sdk.web.async_client import AsyncWebClient
|
||||
import aiohttp
|
||||
SLACK_AVAILABLE = True
|
||||
except ImportError:
|
||||
SLACK_AVAILABLE = False
|
||||
@@ -50,6 +52,16 @@ from gateway.platforms.base import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ContextVar carrying the user_id of the slash-command invoker.
|
||||
# Set in _handle_slash_command, read in send() to match the correct
|
||||
# stashed response_url when multiple users issue commands on the same
|
||||
# channel concurrently. ContextVars propagate to child asyncio.Tasks
|
||||
# (Python 3.7+), so the value set in _handle_slash_command's task is
|
||||
# visible in _process_message_background's child task.
|
||||
_slash_user_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
|
||||
"_slash_user_id", default=None,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _ThreadContextCache:
|
||||
@@ -310,6 +322,11 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
# Track active assistant thread status indicators so stop_typing can
|
||||
# clear them (chat_id → thread_ts).
|
||||
self._active_status_threads: Dict[str, str] = {}
|
||||
# Slash-command contexts: stash response_url + user_id so send()
|
||||
# can route the first reply ephemerally. Keyed by
|
||||
# (channel_id, user_id) to avoid cross-user collisions.
|
||||
# Each value: {"response_url": str, "ts": float}
|
||||
self._slash_command_contexts: Dict[Tuple[str, str], Dict[str, Any]] = {}
|
||||
|
||||
def _describe_slack_api_error(self, response: Any, *, file_obj: Optional[Dict[str, Any]] = None) -> Optional[str]:
|
||||
"""Convert Slack API auth/permission failures into actionable user-facing text."""
|
||||
@@ -368,6 +385,103 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
)
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Slash-command ephemeral helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
_SLASH_CTX_TTL = 120.0 # seconds — response_url is valid for 30 min;
|
||||
# we use a much shorter TTL to avoid routing unrelated messages
|
||||
# as ephemeral if the command handler was slow or dropped.
|
||||
|
||||
def _pop_slash_context(
|
||||
self, chat_id: str,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Return and remove the slash-command context for *chat_id*, if fresh.
|
||||
|
||||
Contexts older than ``_SLASH_CTX_TTL`` seconds are silently discarded.
|
||||
|
||||
Uses the ``_slash_user_id`` ContextVar (set in ``_handle_slash_command``)
|
||||
to match the exact ``(channel_id, user_id)`` key. This prevents a
|
||||
concurrent slash command from a different user on the same channel from
|
||||
stealing another user's ephemeral context. Falls back to a
|
||||
channel-only scan when the ContextVar is unset (e.g. send() called
|
||||
from a non-slash code path — should not match anything).
|
||||
"""
|
||||
now = time.monotonic()
|
||||
# Clean up stale entries on every lookup — dict is small.
|
||||
stale_keys = [
|
||||
k for k, v in self._slash_command_contexts.items()
|
||||
if now - v["ts"] > self._SLASH_CTX_TTL
|
||||
]
|
||||
for k in stale_keys:
|
||||
self._slash_command_contexts.pop(k, None)
|
||||
|
||||
# Precise match: (channel_id, user_id) from ContextVar.
|
||||
uid = _slash_user_id.get()
|
||||
if uid:
|
||||
return self._slash_command_contexts.pop((chat_id, uid), None)
|
||||
|
||||
# Fallback: channel-only scan (only reachable when ContextVar is
|
||||
# unset, i.e. send() called outside a slash-command async context).
|
||||
match_key = None
|
||||
for key in list(self._slash_command_contexts):
|
||||
if key[0] == chat_id:
|
||||
match_key = key
|
||||
break
|
||||
if match_key is None:
|
||||
return None
|
||||
return self._slash_command_contexts.pop(match_key)
|
||||
|
||||
async def _send_slash_ephemeral(
|
||||
self,
|
||||
ctx: Dict[str, Any],
|
||||
content: str,
|
||||
) -> "SendResult":
|
||||
"""Replace the initial ephemeral ack via ``response_url``.
|
||||
|
||||
Slack's ``response_url`` accepts a POST with ``replace_original``
|
||||
for up to 30 minutes after the slash command was invoked. This
|
||||
lets us swap the "Running /cmd…" placeholder with the real reply,
|
||||
and the message stays ephemeral ("Only visible to you").
|
||||
|
||||
Falls back to a simple ``True`` SendResult if the POST fails —
|
||||
the user already saw the initial ack, so a delivery failure here
|
||||
is non-critical.
|
||||
"""
|
||||
formatted = self.format_message(content)
|
||||
# Slack's response_url has the same ~40k char limit as chat_postMessage.
|
||||
# Truncate to MAX_MESSAGE_LENGTH and use only the first chunk — the
|
||||
# response_url replaces a single ephemeral ack, so multi-chunk isn't
|
||||
# possible. Long responses are rare for command replies.
|
||||
chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
|
||||
text = chunks[0] if chunks else formatted
|
||||
payload = {
|
||||
"response_type": "ephemeral",
|
||||
"replace_original": True,
|
||||
"text": text,
|
||||
}
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
ctx["response_url"],
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=10),
|
||||
) as resp:
|
||||
if resp.status == 200:
|
||||
return SendResult(success=True, message_id=None)
|
||||
body = await resp.text()
|
||||
logger.warning(
|
||||
"[Slack] response_url POST returned %s: %s",
|
||||
resp.status,
|
||||
body[:200],
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"[Slack] response_url POST failed: %s", e,
|
||||
)
|
||||
# Non-fatal — the user saw the initial ack already.
|
||||
return SendResult(success=True, message_id=None)
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Slack via Socket Mode."""
|
||||
if not SLACK_AVAILABLE:
|
||||
@@ -414,6 +528,21 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
return False
|
||||
lock_acquired = True
|
||||
|
||||
# Close any previous handler before creating a new one so that
|
||||
# calling connect() a second time (e.g. during a gateway restart or
|
||||
# in-process reconnect attempt) does not leave a zombie Socket Mode
|
||||
# connection alive. Both the old and new connections would otherwise
|
||||
# receive every Slack event and dispatch it twice, producing double
|
||||
# responses — the same bug that affected DiscordAdapter (#18187).
|
||||
if self._handler is not None:
|
||||
try:
|
||||
await self._handler.close_async()
|
||||
except Exception:
|
||||
logger.debug("[%s] Failed to close previous Slack handler", self.name)
|
||||
finally:
|
||||
self._handler = None
|
||||
self._app = None
|
||||
|
||||
# First token is the primary — used for AsyncApp / Socket Mode
|
||||
primary_token = bot_tokens[0]
|
||||
self._app = AsyncApp(token=primary_token)
|
||||
@@ -446,12 +575,16 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
async def handle_message_event(event, say):
|
||||
await self._handle_slack_message(event)
|
||||
|
||||
# Acknowledge app_mention events to prevent Bolt 404 errors.
|
||||
# The "message" handler above already processes @mentions in
|
||||
# channels, so this is intentionally a no-op to avoid duplicates.
|
||||
# Handle app_mention explicitly. In some Slack app configurations,
|
||||
# channel mentions arrive only as app_mention events rather than the
|
||||
# generic message event. Forward them into the normal message
|
||||
# pipeline so @mentions reliably produce replies.
|
||||
# NOTE: when Slack fires BOTH message and app_mention for the same
|
||||
# @mention, they share the same event ts — the dedup in
|
||||
# _handle_slack_message (MessageDeduplicator) suppresses the second.
|
||||
@self._app.event("app_mention")
|
||||
async def handle_app_mention(event, say):
|
||||
pass
|
||||
await self._handle_slack_message(event)
|
||||
|
||||
# File lifecycle events can arrive around snippet uploads even when
|
||||
# the actual user message is what we care about. Ack them so Slack
|
||||
@@ -502,7 +635,11 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
|
||||
@self._app.command(_slash_pattern)
|
||||
async def handle_hermes_command(ack, command):
|
||||
await ack()
|
||||
slash = (command.get("command") or "").lstrip("/")
|
||||
await ack(
|
||||
response_type="ephemeral",
|
||||
text=f"Running `/{slash}`…",
|
||||
)
|
||||
await self._handle_slash_command(command)
|
||||
|
||||
# Register Block Kit action handlers for approval buttons
|
||||
@@ -574,6 +711,17 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
# Check for a pending slash-command context. When the user ran a
|
||||
# native slash command (e.g. /q, /stop, /model), the initial ack
|
||||
# already showed an ephemeral "Running /cmd…" message. If we have
|
||||
# a stashed response_url for this channel, replace that ack with
|
||||
# the actual command reply ephemerally instead of posting publicly.
|
||||
slash_ctx = self._pop_slash_context(chat_id)
|
||||
if slash_ctx:
|
||||
return await self._send_slash_ephemeral(
|
||||
slash_ctx, content,
|
||||
)
|
||||
|
||||
# Convert standard markdown → Slack mrkdwn
|
||||
formatted = self.format_message(content)
|
||||
|
||||
@@ -601,6 +749,10 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
|
||||
last_result = await self._get_client(chat_id).chat_postMessage(**kwargs)
|
||||
|
||||
# Clear Slack Assistant status as soon as the final message is posted.
|
||||
if thread_ts:
|
||||
await self.stop_typing(chat_id)
|
||||
|
||||
# Track the sent message ts so we can auto-respond to thread
|
||||
# replies without requiring @mention.
|
||||
sent_ts = last_result.get("ts") if last_result else None
|
||||
@@ -624,6 +776,42 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
logger.error("[Slack] Send error: %s", e, exc_info=True)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_private_notice(
|
||||
self,
|
||||
chat_id: str,
|
||||
user_id: str,
|
||||
content: str,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send a Slack ephemeral message visible only to one user."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
if not chat_id or not user_id:
|
||||
return SendResult(success=False, error="chat_id and user_id are required")
|
||||
|
||||
try:
|
||||
formatted = self.format_message(content)
|
||||
thread_ts = self._resolve_thread_ts(reply_to, metadata)
|
||||
kwargs = {
|
||||
"channel": chat_id,
|
||||
"user": user_id,
|
||||
"text": formatted,
|
||||
"mrkdwn": True,
|
||||
}
|
||||
if thread_ts:
|
||||
kwargs["thread_ts"] = thread_ts
|
||||
|
||||
result = await self._get_client(chat_id).chat_postEphemeral(**kwargs)
|
||||
return SendResult(
|
||||
success=True,
|
||||
message_id=result.get("message_ts") or result.get("ts"),
|
||||
raw_response=result,
|
||||
)
|
||||
except Exception as e: # pragma: no cover - defensive logging
|
||||
logger.error("[Slack] Ephemeral send error: %s", e, exc_info=True)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def edit_message(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -642,6 +830,8 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
ts=message_id,
|
||||
text=formatted,
|
||||
)
|
||||
if finalize:
|
||||
await self.stop_typing(chat_id)
|
||||
return SendResult(success=True, message_id=message_id)
|
||||
except Exception as e: # pragma: no cover - defensive logging
|
||||
logger.error(
|
||||
@@ -682,7 +872,7 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
# in an assistant-enabled context. Falls back to reactions.
|
||||
logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)
|
||||
|
||||
async def stop_typing(self, chat_id: str) -> None:
|
||||
async def stop_typing(self, chat_id: str, metadata=None) -> None:
|
||||
"""Clear the assistant thread status indicator."""
|
||||
if not self._app:
|
||||
return
|
||||
@@ -969,7 +1159,7 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
return _ph(f'<{url}|{label}>')
|
||||
|
||||
text = re.sub(
|
||||
r'\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)',
|
||||
r'(?<!!)\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)',
|
||||
_convert_markdown_link,
|
||||
text,
|
||||
)
|
||||
@@ -1016,9 +1206,11 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
)
|
||||
|
||||
# 10) Convert italic: _text_ stays as _text_ (already Slack italic)
|
||||
# Single *text* → _text_ (Slack italic)
|
||||
# Single *text* → _text_ (Slack italic), but only when the
|
||||
# emphasized text touches non-whitespace on both sides so literal
|
||||
# delimiters like "a * b * c" are preserved.
|
||||
text = re.sub(
|
||||
r'(?<!\*)\*([^*\n]+)\*(?!\*)',
|
||||
r'(?<!\*)\*(\S(?:[^*\n]*?\S)?)\*(?!\*)',
|
||||
lambda m: _ph(f'_{m.group(1)}_'),
|
||||
text,
|
||||
)
|
||||
@@ -2524,9 +2716,14 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
# gateway command dispatcher by prepending the slash.
|
||||
text = f"/{slash_name} {text}".strip()
|
||||
|
||||
# Slack slash commands can originate from DMs or shared channels.
|
||||
# Preserve DM semantics only for DM channel IDs; shared channels must
|
||||
# keep group semantics so different users do not collide into one
|
||||
# session key.
|
||||
is_dm = str(channel_id).startswith("D")
|
||||
source = self.build_source(
|
||||
chat_id=channel_id,
|
||||
chat_type="dm", # Slash commands are always in DM-like context
|
||||
chat_type="dm" if is_dm else "group",
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
@@ -2537,7 +2734,26 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
raw_message=command,
|
||||
)
|
||||
|
||||
await self.handle_message(event)
|
||||
# Stash the Slack response_url so the first reply for this
|
||||
# channel+user can be routed ephemerally (replaces the initial
|
||||
# "Running /cmd…" ack shown by handle_hermes_command).
|
||||
# Only stash for COMMAND events (text starts with "/") — free-form
|
||||
# questions via "/hermes <question>" must produce public replies so
|
||||
# the whole channel can see the agent's answer.
|
||||
response_url = command.get("response_url", "")
|
||||
if response_url and user_id and channel_id and text.startswith("/"):
|
||||
self._slash_command_contexts[(channel_id, user_id)] = {
|
||||
"response_url": response_url,
|
||||
"ts": time.monotonic(),
|
||||
}
|
||||
|
||||
# Set the ContextVar so send() can match the correct stashed
|
||||
# response_url even when multiple users slash concurrently.
|
||||
_slash_user_id_token = _slash_user_id.set(user_id or None)
|
||||
try:
|
||||
await self.handle_message(event)
|
||||
finally:
|
||||
_slash_user_id.reset(_slash_user_id_token)
|
||||
|
||||
def _has_active_session_for_thread(
|
||||
self,
|
||||
@@ -2698,6 +2914,13 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
raw = os.getenv("SLACK_FREE_RESPONSE_CHANNELS", "")
|
||||
if isinstance(raw, list):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
return {part.strip() for part in raw.split(",") if part.strip()}
|
||||
# Coerce non-list scalars (str/int/float) to str before splitting.
|
||||
# A bare numeric YAML value (`free_response_channels: 1234567890`) is
|
||||
# loaded as int and was previously falling through the isinstance(str)
|
||||
# branch to return an empty set. str() here accepts whatever scalar
|
||||
# the YAML loader hands us without changing existing string/CSV
|
||||
# semantics.
|
||||
s = str(raw).strip() if raw is not None else ""
|
||||
if s:
|
||||
return {part.strip() for part in s.split(",") if part.strip()}
|
||||
return set()
|
||||
|
||||
@@ -10,7 +10,7 @@ Shares credentials with the optional telephony skill — same env vars:
|
||||
|
||||
Gateway-specific env vars:
|
||||
- SMS_WEBHOOK_PORT (default 8080)
|
||||
- SMS_WEBHOOK_HOST (default 0.0.0.0)
|
||||
- SMS_WEBHOOK_HOST (default 127.0.0.1)
|
||||
- SMS_WEBHOOK_URL (public URL for Twilio signature validation — required)
|
||||
- SMS_INSECURE_NO_SIGNATURE (true to disable signature validation — dev only)
|
||||
- SMS_ALLOWED_USERS (comma-separated E.164 phone numbers)
|
||||
@@ -41,7 +41,7 @@ logger = logging.getLogger(__name__)
|
||||
TWILIO_API_BASE = "https://api.twilio.com/2010-04-01/Accounts"
|
||||
MAX_SMS_LENGTH = 1600 # ~10 SMS segments
|
||||
DEFAULT_WEBHOOK_PORT = 8080
|
||||
DEFAULT_WEBHOOK_HOST = "0.0.0.0"
|
||||
DEFAULT_WEBHOOK_HOST = "127.0.0.1"
|
||||
|
||||
|
||||
def check_sms_requirements() -> bool:
|
||||
@@ -91,19 +91,23 @@ class SmsAdapter(BasePlatformAdapter):
|
||||
from aiohttp import web
|
||||
|
||||
if not self._from_number:
|
||||
logger.error("[sms] TWILIO_PHONE_NUMBER not set — cannot send replies")
|
||||
msg = "[sms] TWILIO_PHONE_NUMBER not set — cannot send replies"
|
||||
logger.error(msg)
|
||||
self._set_fatal_error("sms_missing_phone_number", msg, retryable=False)
|
||||
return False
|
||||
|
||||
insecure_no_sig = os.getenv("SMS_INSECURE_NO_SIGNATURE", "").lower() == "true"
|
||||
|
||||
if not self._webhook_url and not insecure_no_sig:
|
||||
logger.error(
|
||||
msg = (
|
||||
"[sms] Refusing to start: SMS_WEBHOOK_URL is required for Twilio "
|
||||
"signature validation. Set it to the public URL configured in your "
|
||||
"Twilio console (e.g. https://example.com/webhooks/twilio). "
|
||||
"For local development without validation, set "
|
||||
"SMS_INSECURE_NO_SIGNATURE=true (NOT recommended for production).",
|
||||
"SMS_INSECURE_NO_SIGNATURE=true (NOT recommended for production)."
|
||||
)
|
||||
logger.error(msg)
|
||||
self._set_fatal_error("sms_missing_webhook_url", msg, retryable=False)
|
||||
return False
|
||||
|
||||
if insecure_no_sig and not self._webhook_url:
|
||||
|
||||
+227
-29
@@ -290,14 +290,53 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
# and any other slash-confirm prompts; see GatewayRunner._request_slash_confirm).
|
||||
self._slash_confirm_state: Dict[str, str] = {}
|
||||
|
||||
@staticmethod
|
||||
def _is_callback_user_authorized(user_id: str) -> bool:
|
||||
def _is_callback_user_authorized(
|
||||
self,
|
||||
user_id: str,
|
||||
*,
|
||||
chat_id: Optional[str] = None,
|
||||
chat_type: Optional[str] = None,
|
||||
thread_id: Optional[str] = None,
|
||||
user_name: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Return whether a Telegram inline-button caller may perform gated actions."""
|
||||
normalized_user_id = str(user_id or "").strip()
|
||||
if not normalized_user_id:
|
||||
return False
|
||||
|
||||
runner = getattr(getattr(self, "_message_handler", None), "__self__", None)
|
||||
auth_fn = getattr(runner, "_is_user_authorized", None)
|
||||
if callable(auth_fn):
|
||||
try:
|
||||
from gateway.session import SessionSource
|
||||
|
||||
normalized_chat_type = str(chat_type or "dm").strip().lower() or "dm"
|
||||
if normalized_chat_type == "private":
|
||||
normalized_chat_type = "dm"
|
||||
elif normalized_chat_type == "supergroup":
|
||||
normalized_chat_type = "forum" if thread_id is not None else "group"
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id=str(chat_id or normalized_user_id),
|
||||
chat_type=normalized_chat_type,
|
||||
user_id=normalized_user_id,
|
||||
user_name=str(user_name).strip() if user_name else None,
|
||||
thread_id=str(thread_id) if thread_id is not None else None,
|
||||
)
|
||||
return bool(auth_fn(source))
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[Telegram] Falling back to env-only callback auth for user %s",
|
||||
normalized_user_id,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
allowed_csv = os.getenv("TELEGRAM_ALLOWED_USERS", "").strip()
|
||||
if not allowed_csv:
|
||||
return True
|
||||
allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
|
||||
return "*" in allowed_ids or user_id in allowed_ids
|
||||
return "*" in allowed_ids or normalized_user_id in allowed_ids
|
||||
|
||||
@classmethod
|
||||
def _metadata_thread_id(cls, metadata: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
@@ -314,7 +353,10 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
@classmethod
|
||||
def _message_thread_id_for_typing(cls, thread_id: Optional[str]) -> Optional[int]:
|
||||
if not thread_id:
|
||||
# Mirrors _message_thread_id_for_send: the General forum topic (thread id
|
||||
# "1") is represented as "no thread id" on the wire. User-created topics
|
||||
# keep their real id so typing stays scoped to that topic.
|
||||
if not thread_id or str(thread_id) == cls._GENERAL_TOPIC_THREAD_ID:
|
||||
return None
|
||||
return int(thread_id)
|
||||
|
||||
@@ -473,6 +515,17 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
self.name, attempt,
|
||||
)
|
||||
self._polling_network_error_count = 0
|
||||
# start_polling() returning is necessary but not sufficient:
|
||||
# PTB's Updater can be left in a state where `running` is True
|
||||
# but the underlying long-poll task is wedged on a stale httpx
|
||||
# connection and never makes progress. No error_callback fires
|
||||
# in that state, so the reconnect ladder won't advance on its
|
||||
# own. Schedule a deferred probe to detect the wedge and
|
||||
# re-enter the ladder if needed.
|
||||
if not self.has_fatal_error:
|
||||
probe = asyncio.ensure_future(self._verify_polling_after_reconnect())
|
||||
self._background_tasks.add(probe)
|
||||
probe.add_done_callback(self._background_tasks.discard)
|
||||
except Exception as retry_err:
|
||||
logger.warning("[%s] Telegram polling reconnect failed: %s", self.name, retry_err)
|
||||
# start_polling failed — polling is dead and no further error
|
||||
@@ -484,6 +537,50 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
self._background_tasks.add(task)
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
|
||||
async def _verify_polling_after_reconnect(self) -> None:
|
||||
"""Heartbeat probe scheduled after a successful reconnect.
|
||||
|
||||
PTB's Updater can survive a botched stop()+start_polling() cycle
|
||||
with `running=True` but a wedged consumer task. No error callback
|
||||
fires, so the reconnect ladder doesn't advance on its own. This
|
||||
probe detects the wedge by:
|
||||
|
||||
1. Sleeping HEARTBEAT_PROBE_DELAY so a healthy long-poll has time
|
||||
to complete at least one cycle.
|
||||
2. Verifying `Updater.running` is still True.
|
||||
3. Probing the bot endpoint with a tight asyncio timeout. A
|
||||
wedged httpx pool fails this probe; a healthy one returns
|
||||
well under the timeout.
|
||||
|
||||
On any failure, re-enter the reconnect ladder so the existing
|
||||
MAX_NETWORK_RETRIES path can ultimately escalate to fatal-error.
|
||||
"""
|
||||
HEARTBEAT_PROBE_DELAY = 60
|
||||
PROBE_TIMEOUT = 10
|
||||
|
||||
await asyncio.sleep(HEARTBEAT_PROBE_DELAY)
|
||||
|
||||
if self.has_fatal_error:
|
||||
return
|
||||
if not (self._app and self._app.updater and self._app.updater.running):
|
||||
logger.warning(
|
||||
"[%s] Updater not running %ds after reconnect — treating as wedged",
|
||||
self.name, HEARTBEAT_PROBE_DELAY,
|
||||
)
|
||||
await self._handle_polling_network_error(
|
||||
RuntimeError("Updater not running after reconnect heartbeat")
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(self._app.bot.get_me(), PROBE_TIMEOUT)
|
||||
except Exception as probe_err:
|
||||
logger.warning(
|
||||
"[%s] Polling heartbeat probe failed %ds after reconnect: %s",
|
||||
self.name, HEARTBEAT_PROBE_DELAY, probe_err,
|
||||
)
|
||||
await self._handle_polling_network_error(probe_err)
|
||||
|
||||
async def _handle_polling_conflict(self, error: Exception) -> None:
|
||||
if self.has_fatal_error and self.fatal_error_code == "telegram_polling_conflict":
|
||||
return
|
||||
@@ -594,6 +691,29 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
)
|
||||
return None
|
||||
|
||||
async def rename_dm_topic(
|
||||
self,
|
||||
chat_id: int,
|
||||
thread_id: int,
|
||||
name: str,
|
||||
) -> None:
|
||||
"""Rename a forum topic in a private (DM) chat."""
|
||||
if not self._bot:
|
||||
return
|
||||
try:
|
||||
chat_id_arg = int(chat_id)
|
||||
except (TypeError, ValueError):
|
||||
chat_id_arg = chat_id
|
||||
await self._bot.edit_forum_topic(
|
||||
chat_id=chat_id_arg,
|
||||
message_thread_id=int(thread_id),
|
||||
name=name,
|
||||
)
|
||||
logger.info(
|
||||
"[%s] Renamed DM topic in chat %s thread_id=%s -> '%s'",
|
||||
self.name, chat_id, thread_id, name,
|
||||
)
|
||||
|
||||
def _persist_dm_topic_thread_id(self, chat_id: int, topic_name: str, thread_id: int) -> None:
|
||||
"""Save a newly created thread_id back into config.yaml so it persists across restarts."""
|
||||
try:
|
||||
@@ -722,6 +842,20 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
# Persist thread_id to config so we don't recreate on next restart
|
||||
self._persist_dm_topic_thread_id(int(chat_id), topic_name, thread_id)
|
||||
|
||||
# Send a seed message so the topic is visible in Telegram's client.
|
||||
# Empty topics are hidden by the client UI until they contain a message.
|
||||
try:
|
||||
await self._bot.send_message(
|
||||
chat_id=int(chat_id),
|
||||
message_thread_id=thread_id,
|
||||
text=f"\U0001f4cc {topic_name}",
|
||||
)
|
||||
except Exception as seed_err:
|
||||
logger.debug(
|
||||
"[%s] Could not send seed message to topic '%s': %s",
|
||||
self.name, topic_name, seed_err,
|
||||
)
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Telegram via polling or webhook.
|
||||
|
||||
@@ -1321,6 +1455,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
async def send_update_prompt(
|
||||
self, chat_id: str, prompt: str, default: str = "",
|
||||
session_key: str = "",
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send an inline-keyboard update prompt (Yes / No buttons).
|
||||
|
||||
@@ -1338,11 +1473,14 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
InlineKeyboardButton("✗ No", callback_data="update_prompt:n"),
|
||||
]
|
||||
])
|
||||
thread_id = self._metadata_thread_id(metadata)
|
||||
message_thread_id = self._message_thread_id_for_send(thread_id)
|
||||
msg = await self._bot.send_message(
|
||||
chat_id=int(chat_id),
|
||||
text=text,
|
||||
parse_mode=ParseMode.MARKDOWN,
|
||||
reply_markup=keyboard,
|
||||
message_thread_id=message_thread_id,
|
||||
**self._link_preview_kwargs(),
|
||||
)
|
||||
return SendResult(success=True, message_id=str(msg.message_id))
|
||||
@@ -1760,6 +1898,12 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
if not query or not query.data:
|
||||
return
|
||||
data = query.data
|
||||
query_message = getattr(query, "message", None)
|
||||
query_chat_id = getattr(query_message, "chat_id", None)
|
||||
query_chat = getattr(query_message, "chat", None)
|
||||
query_chat_type = getattr(query_chat, "type", None)
|
||||
query_thread_id = getattr(query_message, "message_thread_id", None)
|
||||
query_user_name = getattr(query.from_user, "first_name", None)
|
||||
|
||||
# --- Model picker callbacks ---
|
||||
if data.startswith(("mp:", "mm:", "mb", "mx", "mg:")):
|
||||
@@ -1781,7 +1925,13 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
# Only authorized users may click approval buttons.
|
||||
caller_id = str(getattr(query.from_user, "id", ""))
|
||||
if not self._is_callback_user_authorized(caller_id):
|
||||
if not self._is_callback_user_authorized(
|
||||
caller_id,
|
||||
chat_id=query_chat_id,
|
||||
chat_type=str(query_chat_type) if query_chat_type is not None else None,
|
||||
thread_id=str(query_thread_id) if query_thread_id is not None else None,
|
||||
user_name=query_user_name,
|
||||
):
|
||||
await query.answer(text="⛔ You are not authorized to approve commands.")
|
||||
return
|
||||
|
||||
@@ -1831,8 +1981,14 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
choice = parts[1] # once, always, cancel
|
||||
confirm_id = parts[2]
|
||||
|
||||
caller_id = str(getattr(query.from_user, "id", ""))
|
||||
if not self._is_callback_user_authorized(caller_id):
|
||||
caller_id = str(getattr(query.from_user, "id", ""))
|
||||
if not self._is_callback_user_authorized(
|
||||
caller_id,
|
||||
chat_id=query_chat_id,
|
||||
chat_type=str(query_chat_type) if query_chat_type is not None else None,
|
||||
thread_id=str(query_thread_id) if query_thread_id is not None else None,
|
||||
user_name=query_user_name,
|
||||
):
|
||||
await query.answer(text="⛔ You are not authorized to answer this prompt.")
|
||||
return
|
||||
|
||||
@@ -1891,7 +2047,13 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
return
|
||||
answer = data.split(":", 1)[1] # "y" or "n"
|
||||
caller_id = str(getattr(query.from_user, "id", ""))
|
||||
if not self._is_callback_user_authorized(caller_id):
|
||||
if not self._is_callback_user_authorized(
|
||||
caller_id,
|
||||
chat_id=query_chat_id,
|
||||
chat_type=str(query_chat_type) if query_chat_type is not None else None,
|
||||
thread_id=str(query_thread_id) if query_thread_id is not None else None,
|
||||
user_name=query_user_name,
|
||||
):
|
||||
await query.answer(text="⛔ You are not authorized to answer update prompts.")
|
||||
return
|
||||
await query.answer(text=f"Sent '{answer}' to the update process.")
|
||||
@@ -2131,13 +2293,54 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
)
|
||||
return SendResult(success=True, message_id=str(msg.message_id))
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"[%s] Failed to send Telegram local image, falling back to base adapter: %s",
|
||||
self.name,
|
||||
e,
|
||||
exc_info=True,
|
||||
error_str = str(e)
|
||||
# Dimension-related errors are the expected case for valid image
|
||||
# files that Telegram just refuses as photos (screenshots, extreme
|
||||
# aspect ratios). Log at INFO because the document fallback is
|
||||
# the correct path. Any other send_photo failure also falls back
|
||||
# to document (rate limits, corrupt file markers, format edge
|
||||
# cases), but at WARNING because it's unexpected and worth
|
||||
# surfacing in logs.
|
||||
is_dim_error = (
|
||||
"Photo_invalid_dimensions" in error_str
|
||||
or "PHOTO_INVALID_DIMENSIONS" in error_str
|
||||
)
|
||||
return await super().send_image_file(chat_id, image_path, caption, reply_to)
|
||||
if is_dim_error:
|
||||
logger.info(
|
||||
"[%s] Image dimensions exceed Telegram photo limits, "
|
||||
"sending as document: %s",
|
||||
self.name,
|
||||
image_path,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"[%s] Failed to send Telegram local image as photo, "
|
||||
"trying document fallback: %s",
|
||||
self.name,
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
# Fallback to sending as document (file) — no dimension limit,
|
||||
# only 50MB size limit. If even that fails, fall back to the
|
||||
# base adapter's text-only "Image: /path" rendering.
|
||||
try:
|
||||
return await self.send_document(
|
||||
chat_id=chat_id,
|
||||
file_path=image_path,
|
||||
caption=caption,
|
||||
file_name=os.path.basename(image_path),
|
||||
reply_to=reply_to,
|
||||
metadata=metadata,
|
||||
)
|
||||
except Exception as doc_err:
|
||||
logger.error(
|
||||
"[%s] Failed to send Telegram local image as document, "
|
||||
"falling back to base adapter: %s",
|
||||
self.name,
|
||||
doc_err,
|
||||
exc_info=True,
|
||||
)
|
||||
return await super().send_image_file(chat_id, image_path, caption, reply_to)
|
||||
|
||||
async def send_document(
|
||||
self,
|
||||
@@ -2308,21 +2511,16 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
try:
|
||||
_typing_thread = self._metadata_thread_id(metadata)
|
||||
message_thread_id = self._message_thread_id_for_typing(_typing_thread)
|
||||
try:
|
||||
await self._bot.send_chat_action(
|
||||
chat_id=int(chat_id),
|
||||
action="typing",
|
||||
message_thread_id=message_thread_id,
|
||||
)
|
||||
except Exception as e:
|
||||
if message_thread_id is not None and self._is_thread_not_found_error(e):
|
||||
await self._bot.send_chat_action(
|
||||
chat_id=int(chat_id),
|
||||
action="typing",
|
||||
message_thread_id=None,
|
||||
)
|
||||
else:
|
||||
raise
|
||||
# No retry-without-thread fallback here: _message_thread_id_for_typing
|
||||
# already maps the forum General topic to None, so any non-None value
|
||||
# reaching this call is a user-created topic. If Telegram rejects it
|
||||
# (e.g. topic deleted mid-session), we swallow the failure rather than
|
||||
# showing a typing indicator in the wrong chat/All Messages.
|
||||
await self._bot.send_chat_action(
|
||||
chat_id=int(chat_id),
|
||||
action="typing",
|
||||
message_thread_id=message_thread_id,
|
||||
)
|
||||
except Exception as e:
|
||||
# Typing failures are non-fatal; log at debug level only.
|
||||
logger.debug(
|
||||
|
||||
@@ -185,10 +185,13 @@ async def _query_doh_provider(
|
||||
async def discover_fallback_ips() -> list[str]:
|
||||
"""Auto-discover Telegram API IPs via DNS-over-HTTPS.
|
||||
|
||||
Resolves api.telegram.org through Google and Cloudflare DoH, collects all
|
||||
unique IPs, and excludes the system-DNS-resolved IP (which is presumably
|
||||
unreachable on this network). Falls back to a hardcoded seed list when DoH
|
||||
is also unavailable.
|
||||
Resolves api.telegram.org through Google and Cloudflare DoH and returns all
|
||||
unique A records. IPs that match the local system resolver are kept rather
|
||||
than excluded: in many networks the system-DNS IP is the most reliable path
|
||||
to api.telegram.org and a transient primary-path failure should be retried
|
||||
against the same address via the IP-rewrite path before the seed list is
|
||||
consulted (#14520). Falls back to a hardcoded seed list only when DoH
|
||||
yields no usable answers.
|
||||
"""
|
||||
async with httpx.AsyncClient(timeout=httpx.Timeout(_DOH_TIMEOUT)) as client:
|
||||
doh_tasks = [_query_doh_provider(client, p) for p in _DOH_PROVIDERS]
|
||||
@@ -203,11 +206,11 @@ async def discover_fallback_ips() -> list[str]:
|
||||
if isinstance(r, list):
|
||||
doh_ips.extend(r)
|
||||
|
||||
# Deduplicate preserving order, exclude system-DNS IPs
|
||||
# Deduplicate preserving order
|
||||
seen: set[str] = set()
|
||||
candidates: list[str] = []
|
||||
for ip in doh_ips:
|
||||
if ip not in seen and ip not in system_ips:
|
||||
if ip not in seen:
|
||||
seen.add(ip)
|
||||
candidates.append(ip)
|
||||
|
||||
@@ -219,7 +222,7 @@ async def discover_fallback_ips() -> list[str]:
|
||||
return validated
|
||||
|
||||
logger.info(
|
||||
"DoH discovery yielded no new IPs (system DNS: %s); using seed fallback IPs %s",
|
||||
"DoH discovery yielded no usable IPs (system DNS: %s); using seed fallback IPs %s",
|
||||
", ".join(system_ips) or "unknown",
|
||||
", ".join(_SEED_FALLBACK_IPS),
|
||||
)
|
||||
|
||||
@@ -142,6 +142,7 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
"""WeCom AI Bot adapter backed by a persistent WebSocket connection."""
|
||||
|
||||
MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH
|
||||
SUPPORTS_MESSAGE_EDITING = False
|
||||
# Threshold for detecting WeCom client-side message splits.
|
||||
# When a chunk is near the 4000-char limit, a continuation is almost certain.
|
||||
_SPLIT_THRESHOLD = 3900
|
||||
@@ -206,7 +207,11 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
return False
|
||||
|
||||
try:
|
||||
self._http_client = httpx.AsyncClient(timeout=30.0, follow_redirects=True)
|
||||
# Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
|
||||
from gateway.platforms._http_client_limits import platform_httpx_limits
|
||||
self._http_client = httpx.AsyncClient(
|
||||
timeout=30.0, follow_redirects=True, limits=platform_httpx_limits(),
|
||||
)
|
||||
await self._open_connection()
|
||||
self._mark_connected()
|
||||
self._listen_task = asyncio.create_task(self._listen_loop())
|
||||
@@ -1010,6 +1015,8 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
if not aes_key:
|
||||
raise ValueError("aes_key is required")
|
||||
|
||||
# WeCom doesn't pad base64 keys; add padding if needed
|
||||
aes_key = aes_key + '=' * ((4 - len(aes_key) % 4) % 4)
|
||||
key = base64.b64decode(aes_key)
|
||||
if len(key) != 32:
|
||||
raise ValueError(f"Invalid WeCom AES key length: expected 32 bytes, got {len(key)}")
|
||||
|
||||
@@ -119,7 +119,9 @@ class WecomCallbackAdapter(BasePlatformAdapter):
|
||||
pass
|
||||
|
||||
try:
|
||||
self._http_client = httpx.AsyncClient(timeout=20.0)
|
||||
# Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
|
||||
from gateway.platforms._http_client_limits import platform_httpx_limits
|
||||
self._http_client = httpx.AsyncClient(timeout=20.0, limits=platform_httpx_limits())
|
||||
self._app = web.Application()
|
||||
self._app.router.add_get("/health", self._handle_health)
|
||||
self._app.router.add_get(self._path, self._handle_verify)
|
||||
|
||||
@@ -1333,6 +1333,15 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
if message_id and self._dedup.is_duplicate(message_id):
|
||||
return
|
||||
|
||||
# Secondary content-fingerprint dedup for text messages
|
||||
item_list = message.get("item_list") or []
|
||||
text = _extract_text(item_list)
|
||||
if text:
|
||||
content_key = f"content:{sender_id}:{hashlib.md5(text.encode()).hexdigest()}"
|
||||
if self._dedup.is_duplicate(content_key):
|
||||
logger.debug("[%s] Content-dedup: skipping duplicate message from %s", self.name, sender_id)
|
||||
return
|
||||
|
||||
chat_type, effective_chat_id = _guess_chat_type(message, self._account_id)
|
||||
if chat_type == "group":
|
||||
if self._group_policy == "disabled":
|
||||
@@ -1347,8 +1356,6 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
self._token_store.set(self._account_id, sender_id, context_token)
|
||||
asyncio.create_task(self._maybe_fetch_typing_ticket(sender_id, context_token or None))
|
||||
|
||||
item_list = message.get("item_list") or []
|
||||
text = _extract_text(item_list)
|
||||
media_paths: List[str] = []
|
||||
media_types: List[str] = []
|
||||
|
||||
@@ -2030,7 +2037,9 @@ async def send_weixin_direct(
|
||||
|
||||
live_adapter = _LIVE_ADAPTERS.get(resolved_token)
|
||||
send_session = getattr(live_adapter, '_send_session', None)
|
||||
if live_adapter is not None and send_session is not None and not send_session.closed:
|
||||
if (live_adapter is not None and send_session is not None
|
||||
and not send_session.closed
|
||||
and send_session._loop is asyncio.get_running_loop()):
|
||||
last_result: Optional[SendResult] = None
|
||||
cleaned = live_adapter.format_message(message)
|
||||
if cleaned:
|
||||
|
||||
@@ -185,6 +185,13 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
self._bridge_log: Optional[Path] = None
|
||||
self._poll_task: Optional[asyncio.Task] = None
|
||||
self._http_session: Optional["aiohttp.ClientSession"] = None
|
||||
# Set to True by disconnect() before we SIGTERM our child bridge so
|
||||
# _check_managed_bridge_exit() can distinguish an intentional
|
||||
# shutdown-time exit (returncode -15 / -2 / 0) from a real crash.
|
||||
# Without this, every graceful gateway shutdown/restart would log
|
||||
# "Fatal whatsapp adapter error" plus dispatch a fatal-error
|
||||
# notification before the normal "✓ whatsapp disconnected" fires.
|
||||
self._shutting_down: bool = False
|
||||
|
||||
def _whatsapp_require_mention(self) -> bool:
|
||||
configured = self.config.extra.get("require_mention")
|
||||
@@ -555,6 +562,21 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
if returncode is None:
|
||||
return None
|
||||
|
||||
# Planned shutdown: disconnect() sets _shutting_down before it sends
|
||||
# SIGTERM to the bridge, so a returncode of -15 (SIGTERM), -2 (SIGINT),
|
||||
# or 0 (clean exit) at that point is expected, not a crash. Treat it
|
||||
# as informational and skip the fatal-error path.
|
||||
# getattr-with-default keeps tests that construct the adapter via
|
||||
# ``WhatsAppAdapter.__new__`` (bypassing __init__) working without
|
||||
# every _make_adapter() helper having to seed the attribute.
|
||||
if getattr(self, "_shutting_down", False) and returncode in (0, -2, -15):
|
||||
logger.info(
|
||||
"[%s] Bridge exited during shutdown (code %d).",
|
||||
self.name,
|
||||
returncode,
|
||||
)
|
||||
return None
|
||||
|
||||
message = f"WhatsApp bridge process exited unexpectedly (code {returncode})."
|
||||
if not self.has_fatal_error:
|
||||
logger.error("[%s] %s", self.name, message)
|
||||
@@ -565,6 +587,10 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
"""Stop the WhatsApp bridge and clean up any orphaned processes."""
|
||||
# Flip the shutdown flag BEFORE signalling the child so the exit-check
|
||||
# path (which runs from other tasks like send() and the poll loop)
|
||||
# doesn't race us and report the intentional termination as fatal.
|
||||
self._shutting_down = True
|
||||
if self._bridge_process:
|
||||
try:
|
||||
try:
|
||||
@@ -876,11 +902,15 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
try:
|
||||
import aiohttp
|
||||
|
||||
await self._http_session.post(
|
||||
# Must wrap in `async with` — a bare `await session.post(...)`
|
||||
# leaves the response object alive until GC, holding its TCP
|
||||
# socket in CLOSE_WAIT. See #18451.
|
||||
async with self._http_session.post(
|
||||
f"http://127.0.0.1:{self._bridge_port}/typing",
|
||||
json={"chatId": chat_id},
|
||||
timeout=aiohttp.ClientTimeout(total=5)
|
||||
)
|
||||
):
|
||||
pass
|
||||
except Exception:
|
||||
pass # Ignore typing indicator failures
|
||||
|
||||
|
||||
@@ -1896,10 +1896,12 @@ class OwnerCommandMiddleware(InboundMiddleware):
|
||||
if cmd not in cls.ALLOWLIST:
|
||||
return None, None, False
|
||||
|
||||
# Sender identity check: bot owner <-> push.from_account == push.bot_owner_id
|
||||
# owner_id = (push or {}).get("bot_owner_id") or ""
|
||||
# is_owner = bool(owner_id) and owner_id == from_account
|
||||
is_owner = True
|
||||
# Sender identity check: bot owner <-> push.from_account == push.bot_owner_id.
|
||||
# The allowlisted commands (/approve, /deny, /stop, /reset, ...) are
|
||||
# privileged — leaking them to non-owners lets any group member approve
|
||||
# a dangerous tool call, kill the owner's task, or wipe session state.
|
||||
owner_id = str((push or {}).get("bot_owner_id") or "").strip()
|
||||
is_owner = bool(owner_id) and owner_id == from_account
|
||||
return cmd, cmd_line, is_owner
|
||||
|
||||
async def handle(self, ctx: InboundContext, next_fn) -> None:
|
||||
|
||||
+2387
-186
File diff suppressed because it is too large
Load Diff
+34
-16
@@ -458,6 +458,15 @@ class SessionEntry:
|
||||
was_auto_reset: bool = False
|
||||
auto_reset_reason: Optional[str] = None # "idle" or "daily"
|
||||
reset_had_activity: bool = False # whether the expired session had any messages
|
||||
|
||||
# Set by reset_session() when the user explicitly sends /new or /reset.
|
||||
# Consumed once by _handle_message_with_agent to trigger topic/channel
|
||||
# skill re-injection on the first message of the new session. We can't
|
||||
# reuse was_auto_reset for this because that flag fires the "session
|
||||
# expired due to inactivity" user-facing notice and a misleading
|
||||
# context-note prepend — both wrong for an explicit manual reset.
|
||||
# See issue #6508.
|
||||
is_fresh_reset: bool = False
|
||||
|
||||
# Set by the background expiry watcher after it finalizes an expired
|
||||
# session (invoking on_session_finalize hooks and evicting the cached
|
||||
@@ -508,6 +517,7 @@ class SessionEntry:
|
||||
if self.last_resume_marked_at
|
||||
else None
|
||||
),
|
||||
"is_fresh_reset": self.is_fresh_reset,
|
||||
}
|
||||
if self.origin:
|
||||
result["origin"] = self.origin.to_dict()
|
||||
@@ -556,6 +566,7 @@ class SessionEntry:
|
||||
resume_pending=data.get("resume_pending", False),
|
||||
resume_reason=data.get("resume_reason"),
|
||||
last_resume_marked_at=last_resume_marked_at,
|
||||
is_fresh_reset=data.get("is_fresh_reset", False),
|
||||
)
|
||||
|
||||
|
||||
@@ -1075,19 +1086,22 @@ class SessionStore:
|
||||
return len(removed_keys)
|
||||
|
||||
def suspend_recently_active(self, max_age_seconds: int = 120) -> int:
|
||||
"""Mark recently-active sessions as suspended.
|
||||
"""Mark recently-active sessions as resumable after an unexpected exit.
|
||||
|
||||
Called on gateway startup to prevent sessions that were likely
|
||||
in-flight when the gateway last exited from being blindly resumed
|
||||
(#7536). Only suspends sessions updated within *max_age_seconds*
|
||||
to avoid resetting long-idle sessions that are harmless to resume.
|
||||
Returns the number of sessions that were suspended.
|
||||
Called on gateway startup after a crash or fast restart to preserve
|
||||
in-flight sessions instead of destroying their conversation history
|
||||
(#7536). Only marks sessions updated within *max_age_seconds* to
|
||||
avoid touching long-idle sessions. Sets ``resume_pending=True`` so
|
||||
the next incoming message on the same session_key auto-resumes from
|
||||
the existing transcript.
|
||||
|
||||
Entries flagged ``resume_pending=True`` are skipped — those were
|
||||
marked intentionally by the drain-timeout path as recoverable.
|
||||
Terminal escalation for genuinely stuck ``resume_pending`` sessions
|
||||
is handled by the existing ``.restart_failure_counts`` stuck-loop
|
||||
counter, which runs after this method on startup.
|
||||
Entries already flagged ``resume_pending=True`` are skipped. Entries
|
||||
explicitly ``suspended=True`` (from /stop or stuck-loop escalation)
|
||||
are also skipped. Terminal escalation for genuinely stuck sessions
|
||||
is still handled by the existing ``.restart_failure_counts`` counter
|
||||
(threshold 3), which runs after this method and sets ``suspended=True``.
|
||||
|
||||
Returns the number of sessions marked resumable.
|
||||
"""
|
||||
from datetime import timedelta
|
||||
|
||||
@@ -1099,13 +1113,15 @@ class SessionStore:
|
||||
if entry.resume_pending:
|
||||
continue
|
||||
if not entry.suspended and entry.updated_at >= cutoff:
|
||||
entry.suspended = True
|
||||
entry.resume_pending = True
|
||||
entry.resume_reason = "restart_interrupted"
|
||||
entry.last_resume_marked_at = _now()
|
||||
count += 1
|
||||
if count:
|
||||
self._save()
|
||||
return count
|
||||
|
||||
def reset_session(self, session_key: str) -> Optional[SessionEntry]:
|
||||
def reset_session(self, session_key: str, display_name: Optional[str] = None) -> Optional[SessionEntry]:
|
||||
"""Force reset a session, creating a new session ID."""
|
||||
db_end_session_id = None
|
||||
db_create_kwargs = None
|
||||
@@ -1129,9 +1145,10 @@ class SessionStore:
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
origin=old_entry.origin,
|
||||
display_name=old_entry.display_name,
|
||||
display_name=display_name if display_name is not None else old_entry.display_name,
|
||||
platform=old_entry.platform,
|
||||
chat_type=old_entry.chat_type,
|
||||
is_fresh_reset=True,
|
||||
)
|
||||
|
||||
self._entries[session_key] = new_entry
|
||||
@@ -1259,8 +1276,9 @@ class SessionStore:
|
||||
|
||||
# Also write legacy JSONL (keeps existing tooling working during transition)
|
||||
transcript_path = self.get_transcript_path(session_id)
|
||||
with open(transcript_path, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(message, ensure_ascii=False) + "\n")
|
||||
with self._lock:
|
||||
with open(transcript_path, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(message, ensure_ascii=False) + "\n")
|
||||
|
||||
def rewrite_transcript(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
|
||||
"""Replace the entire transcript for a session with new messages.
|
||||
|
||||
+115
-55
@@ -21,6 +21,7 @@ from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Any, Optional
|
||||
from utils import atomic_json_write
|
||||
|
||||
if sys.platform == "win32":
|
||||
import msvcrt
|
||||
@@ -34,6 +35,10 @@ _IS_WINDOWS = sys.platform == "win32"
|
||||
_UNSET = object()
|
||||
_GATEWAY_LOCK_FILENAME = "gateway.lock"
|
||||
_gateway_lock_handle = None
|
||||
# Windows byte-range locks are mandatory for other readers. Lock a byte well
|
||||
# past the JSON payload so runtime status / PID readers can still read the file
|
||||
# while another process holds the mutual-exclusion lock.
|
||||
_WINDOWS_LOCK_OFFSET = 1024 * 1024
|
||||
|
||||
|
||||
def _get_pid_path() -> Path:
|
||||
@@ -205,8 +210,7 @@ def _read_json_file(path: Path) -> Optional[dict[str, Any]]:
|
||||
|
||||
|
||||
def _write_json_file(path: Path, payload: dict[str, Any]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(payload))
|
||||
atomic_json_write(path, payload, indent=None, separators=(",", ":"))
|
||||
|
||||
|
||||
def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
|
||||
@@ -286,7 +290,7 @@ def _try_acquire_file_lock(handle) -> bool:
|
||||
if handle.tell() == 0:
|
||||
handle.write("\n")
|
||||
handle.flush()
|
||||
handle.seek(0)
|
||||
handle.seek(_WINDOWS_LOCK_OFFSET)
|
||||
msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
|
||||
else:
|
||||
fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
@@ -298,7 +302,7 @@ def _try_acquire_file_lock(handle) -> bool:
|
||||
def _release_file_lock(handle) -> None:
|
||||
try:
|
||||
if _IS_WINDOWS:
|
||||
handle.seek(0)
|
||||
handle.seek(_WINDOWS_LOCK_OFFSET)
|
||||
msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
|
||||
else:
|
||||
fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
|
||||
@@ -633,6 +637,8 @@ def release_all_scoped_locks(
|
||||
|
||||
_TAKEOVER_MARKER_FILENAME = ".gateway-takeover.json"
|
||||
_TAKEOVER_MARKER_TTL_S = 60 # Marker older than this is treated as stale
|
||||
_PLANNED_STOP_MARKER_FILENAME = ".gateway-planned-stop.json"
|
||||
_PLANNED_STOP_MARKER_TTL_S = 60
|
||||
|
||||
|
||||
def _get_takeover_marker_path() -> Path:
|
||||
@@ -641,6 +647,67 @@ def _get_takeover_marker_path() -> Path:
|
||||
return home / _TAKEOVER_MARKER_FILENAME
|
||||
|
||||
|
||||
def _get_planned_stop_marker_path() -> Path:
|
||||
"""Return the path to the intentional gateway stop marker file."""
|
||||
home = get_hermes_home()
|
||||
return home / _PLANNED_STOP_MARKER_FILENAME
|
||||
|
||||
|
||||
def _marker_is_stale(written_at: str, ttl_s: int) -> bool:
|
||||
try:
|
||||
written_dt = datetime.fromisoformat(written_at)
|
||||
age = (datetime.now(timezone.utc) - written_dt).total_seconds()
|
||||
return age > ttl_s
|
||||
except (TypeError, ValueError):
|
||||
return True
|
||||
|
||||
|
||||
def _consume_pid_marker_for_self(
|
||||
path: Path,
|
||||
*,
|
||||
pid_field: str,
|
||||
start_time_field: str,
|
||||
ttl_s: int,
|
||||
) -> bool:
|
||||
record = _read_json_file(path)
|
||||
if not record:
|
||||
return False
|
||||
|
||||
try:
|
||||
target_pid = int(record[pid_field])
|
||||
target_start_time = record.get(start_time_field)
|
||||
written_at = record.get("written_at") or ""
|
||||
except (KeyError, TypeError, ValueError):
|
||||
try:
|
||||
path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
return False
|
||||
|
||||
if _marker_is_stale(written_at, ttl_s):
|
||||
try:
|
||||
path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
return False
|
||||
|
||||
our_pid = os.getpid()
|
||||
our_start_time = _get_process_start_time(our_pid)
|
||||
matches = (
|
||||
target_pid == our_pid
|
||||
and target_start_time is not None
|
||||
and our_start_time is not None
|
||||
and target_start_time == our_start_time
|
||||
)
|
||||
|
||||
try:
|
||||
path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
return matches
|
||||
|
||||
|
||||
def write_takeover_marker(target_pid: int) -> bool:
|
||||
"""Record that ``target_pid`` is being replaced by the current process.
|
||||
|
||||
@@ -677,59 +744,13 @@ def consume_takeover_marker_for_self() -> bool:
|
||||
Always unlinks the marker on match (and on detected staleness) so
|
||||
subsequent unrelated signals don't re-trigger.
|
||||
"""
|
||||
path = _get_takeover_marker_path()
|
||||
record = _read_json_file(path)
|
||||
if not record:
|
||||
return False
|
||||
|
||||
# Any malformed or stale marker → drop it and return False
|
||||
try:
|
||||
target_pid = int(record["target_pid"])
|
||||
target_start_time = record.get("target_start_time")
|
||||
written_at = record.get("written_at") or ""
|
||||
except (KeyError, TypeError, ValueError):
|
||||
try:
|
||||
path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
return False
|
||||
|
||||
# TTL guard: a stale marker older than _TAKEOVER_MARKER_TTL_S is ignored.
|
||||
stale = False
|
||||
try:
|
||||
written_dt = datetime.fromisoformat(written_at)
|
||||
age = (datetime.now(timezone.utc) - written_dt).total_seconds()
|
||||
if age > _TAKEOVER_MARKER_TTL_S:
|
||||
stale = True
|
||||
except (TypeError, ValueError):
|
||||
stale = True # Unparseable timestamp — treat as stale
|
||||
|
||||
if stale:
|
||||
try:
|
||||
path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
return False
|
||||
|
||||
# Does the marker name THIS process?
|
||||
our_pid = os.getpid()
|
||||
our_start_time = _get_process_start_time(our_pid)
|
||||
matches = (
|
||||
target_pid == our_pid
|
||||
and target_start_time is not None
|
||||
and our_start_time is not None
|
||||
and target_start_time == our_start_time
|
||||
return _consume_pid_marker_for_self(
|
||||
_get_takeover_marker_path(),
|
||||
pid_field="target_pid",
|
||||
start_time_field="target_start_time",
|
||||
ttl_s=_TAKEOVER_MARKER_TTL_S,
|
||||
)
|
||||
|
||||
# Consume the marker whether it matched or not — a marker that doesn't
|
||||
# match our identity is stale-for-us anyway.
|
||||
try:
|
||||
path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
return matches
|
||||
|
||||
|
||||
def clear_takeover_marker() -> None:
|
||||
"""Remove the takeover marker unconditionally. Safe to call repeatedly."""
|
||||
@@ -739,6 +760,45 @@ def clear_takeover_marker() -> None:
|
||||
pass
|
||||
|
||||
|
||||
def write_planned_stop_marker(target_pid: int) -> bool:
|
||||
"""Record that ``target_pid`` is being stopped intentionally.
|
||||
|
||||
The gateway exits non-zero for unexpected SIGTERM so service managers can
|
||||
revive it. Service stop commands send the same SIGTERM, so the CLI writes
|
||||
this short-lived marker first to let the target process exit cleanly.
|
||||
"""
|
||||
try:
|
||||
target_start_time = _get_process_start_time(target_pid)
|
||||
record = {
|
||||
"target_pid": target_pid,
|
||||
"target_start_time": target_start_time,
|
||||
"stopper_pid": os.getpid(),
|
||||
"written_at": _utc_now_iso(),
|
||||
}
|
||||
_write_json_file(_get_planned_stop_marker_path(), record)
|
||||
return True
|
||||
except (OSError, PermissionError):
|
||||
return False
|
||||
|
||||
|
||||
def consume_planned_stop_marker_for_self() -> bool:
|
||||
"""Return True when the current process is being intentionally stopped."""
|
||||
return _consume_pid_marker_for_self(
|
||||
_get_planned_stop_marker_path(),
|
||||
pid_field="target_pid",
|
||||
start_time_field="target_start_time",
|
||||
ttl_s=_PLANNED_STOP_MARKER_TTL_S,
|
||||
)
|
||||
|
||||
|
||||
def clear_planned_stop_marker() -> None:
|
||||
"""Remove the planned-stop marker unconditionally."""
|
||||
try:
|
||||
_get_planned_stop_marker_path().unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def get_running_pid(
|
||||
pid_path: Optional[Path] = None,
|
||||
*,
|
||||
|
||||
+35
-3
@@ -5,11 +5,43 @@ Provides subcommands for:
|
||||
- hermes chat - Interactive chat (same as ./hermes)
|
||||
- hermes gateway - Run gateway in foreground
|
||||
- hermes gateway start - Start gateway service
|
||||
- hermes gateway stop - Stop gateway service
|
||||
- hermes gateway stop - Stop gateway service
|
||||
- hermes setup - Interactive setup wizard
|
||||
- hermes status - Show status of all components
|
||||
- hermes cron - Manage cron jobs
|
||||
"""
|
||||
|
||||
__version__ = "0.11.0"
|
||||
__release_date__ = "2026.4.23"
|
||||
import os
|
||||
import sys
|
||||
|
||||
__version__ = "0.12.0"
|
||||
__release_date__ = "2026.4.30"
|
||||
|
||||
|
||||
def _ensure_utf8():
|
||||
"""Force UTF-8 stdout/stderr on Windows to prevent UnicodeEncodeError.
|
||||
|
||||
Windows services and terminals default to cp1252, which cannot encode
|
||||
box-drawing characters used in CLI output. This causes unhandled
|
||||
UnicodeEncodeError crashes on gateway startup.
|
||||
"""
|
||||
if sys.platform != "win32":
|
||||
return
|
||||
os.environ.setdefault("PYTHONUTF8", "1")
|
||||
os.environ.setdefault("PYTHONIOENCODING", "utf-8")
|
||||
for stream_name in ("stdout", "stderr"):
|
||||
stream = getattr(sys, stream_name, None)
|
||||
if stream is None:
|
||||
continue
|
||||
try:
|
||||
if getattr(stream, "encoding", "").lower().replace("-", "") != "utf8":
|
||||
new_stream = open(
|
||||
stream.fileno(), "w", encoding="utf-8",
|
||||
buffering=1, closefd=False,
|
||||
)
|
||||
setattr(sys, stream_name, new_stream)
|
||||
except (AttributeError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
_ensure_utf8()
|
||||
|
||||
+437
-24
@@ -43,7 +43,7 @@ import yaml
|
||||
|
||||
from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from utils import atomic_replace
|
||||
from utils import atomic_replace, atomic_yaml_write, is_truthy_value
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -416,6 +416,40 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
),
|
||||
}
|
||||
|
||||
# Auto-extend PROVIDER_REGISTRY with any api-key provider registered in
|
||||
# providers/ that is not already declared above. New providers only need a
|
||||
# plugins/model-providers/<name>/ plugin — no edits to this file required.
|
||||
try:
|
||||
from providers import list_providers as _list_providers_for_registry
|
||||
for _pp in _list_providers_for_registry():
|
||||
if _pp.name in PROVIDER_REGISTRY:
|
||||
continue
|
||||
if _pp.auth_type != "api_key" or not _pp.env_vars:
|
||||
continue
|
||||
# Skip providers that need custom token resolution or are special-cased
|
||||
# in resolve_provider() (copilot/kimi/zai have bespoke token refresh;
|
||||
# openrouter/custom are aggregator/user-supplied and handled outside
|
||||
# the registry — adding them here breaks runtime_provider resolution
|
||||
# that relies on `openrouter not in PROVIDER_REGISTRY`).
|
||||
if _pp.name in {"copilot", "kimi-coding", "kimi-coding-cn", "zai", "openrouter", "custom"}:
|
||||
continue
|
||||
_api_key_vars = tuple(v for v in _pp.env_vars if not v.endswith("_BASE_URL") and not v.endswith("_URL"))
|
||||
_base_url_var = next((v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")), None)
|
||||
PROVIDER_REGISTRY[_pp.name] = ProviderConfig(
|
||||
id=_pp.name,
|
||||
name=_pp.display_name or _pp.name,
|
||||
auth_type="api_key",
|
||||
inference_base_url=_pp.base_url,
|
||||
api_key_env_vars=_api_key_vars or _pp.env_vars,
|
||||
base_url_env_var=_base_url_var or "",
|
||||
)
|
||||
# Also register aliases so resolve_provider() resolves them
|
||||
for _alias in _pp.aliases:
|
||||
if _alias not in PROVIDER_REGISTRY:
|
||||
PROVIDER_REGISTRY[_alias] = PROVIDER_REGISTRY[_pp.name]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Anthropic Key Helper
|
||||
@@ -746,6 +780,73 @@ def _auth_file_path() -> Path:
|
||||
return path
|
||||
|
||||
|
||||
def _global_auth_file_path() -> Optional[Path]:
|
||||
"""Return the global-root auth.json when the process is in profile mode.
|
||||
|
||||
Returns ``None`` when the profile and global root resolve to the same
|
||||
directory (classic mode, or custom HERMES_HOME that is not a profile).
|
||||
Used by read-only fallback paths so providers authed at the root are
|
||||
visible to profile processes that haven't configured them locally.
|
||||
|
||||
See issue #18594 follow-up (credential_pool shadowing).
|
||||
"""
|
||||
try:
|
||||
from hermes_constants import get_default_hermes_root
|
||||
global_root = get_default_hermes_root()
|
||||
except Exception:
|
||||
return None
|
||||
profile_home = get_hermes_home()
|
||||
try:
|
||||
if profile_home.resolve(strict=False) == global_root.resolve(strict=False):
|
||||
return None
|
||||
except Exception:
|
||||
if profile_home == global_root:
|
||||
return None
|
||||
# No pytest seat belt here: this is a pure read-only path, and
|
||||
# ``_load_global_auth_store()`` wraps the read in a try/except so an
|
||||
# unreadable global file can never break the profile process. The
|
||||
# write-side seat belt still lives on ``_auth_file_path()`` where it
|
||||
# belongs (that's what protects the real user's auth store from being
|
||||
# corrupted by a mis-configured test).
|
||||
return global_root / "auth.json"
|
||||
|
||||
|
||||
def _load_global_auth_store() -> Dict[str, Any]:
|
||||
"""Load the global-root auth store (read-only fallback).
|
||||
|
||||
Returns an empty dict when no global fallback exists (classic mode,
|
||||
or the global auth.json is absent). Never raises on missing file.
|
||||
|
||||
Seat belt: under pytest, refuses to read the real user's
|
||||
``~/.hermes/auth.json`` even when HERMES_HOME is set to a profile
|
||||
path. The hermetic conftest does not redirect ``HOME``, so
|
||||
``get_default_hermes_root()`` for a profile-shaped HERMES_HOME can
|
||||
still resolve to the real user's home on a dev machine. That would
|
||||
leak real credentials into tests. This guard uses the unmodified
|
||||
``HOME`` env var (what ``os.path.expanduser('~')`` would resolve to),
|
||||
not ``Path.home()``, because ``Path.home`` is sometimes monkeypatched
|
||||
by fixtures that want to relocate the global root to a tmp path.
|
||||
"""
|
||||
global_path = _global_auth_file_path()
|
||||
if global_path is None or not global_path.exists():
|
||||
return {}
|
||||
if os.environ.get("PYTEST_CURRENT_TEST"):
|
||||
real_home_env = os.environ.get("HOME", "")
|
||||
if real_home_env:
|
||||
real_root = Path(real_home_env) / ".hermes" / "auth.json"
|
||||
try:
|
||||
if global_path.resolve(strict=False) == real_root.resolve(strict=False):
|
||||
return {}
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
return _load_auth_store(global_path)
|
||||
except Exception:
|
||||
# A malformed global store must not break profile reads. The
|
||||
# profile's own auth store is still authoritative.
|
||||
return {}
|
||||
|
||||
|
||||
def _auth_lock_path() -> Path:
|
||||
return _auth_file_path().with_suffix(".lock")
|
||||
|
||||
@@ -932,15 +1033,50 @@ def get_auth_provider_display_name(provider_id: str) -> str:
|
||||
|
||||
|
||||
def read_credential_pool(provider_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Return the persisted credential pool, or one provider slice."""
|
||||
"""Return the persisted credential pool, or one provider slice.
|
||||
|
||||
In profile mode, the profile's credential pool is authoritative. If a
|
||||
provider has no entries in the profile, entries from the global-root
|
||||
``auth.json`` are used as a read-only fallback — so workers spawned in a
|
||||
profile can see providers that were only authenticated at global scope.
|
||||
|
||||
Profile entries always win: the global fallback only applies per-provider
|
||||
when the profile has zero entries for that provider. Once the user runs
|
||||
``hermes auth add <provider>`` inside the profile, profile entries
|
||||
fully shadow global for that provider on the next read.
|
||||
|
||||
Writes always go to the profile (``write_credential_pool`` is unchanged).
|
||||
See issue #18594 follow-up.
|
||||
"""
|
||||
auth_store = _load_auth_store()
|
||||
pool = auth_store.get("credential_pool")
|
||||
if not isinstance(pool, dict):
|
||||
pool = {}
|
||||
|
||||
global_pool: Dict[str, Any] = {}
|
||||
global_store = _load_global_auth_store()
|
||||
maybe_global_pool = global_store.get("credential_pool") if global_store else None
|
||||
if isinstance(maybe_global_pool, dict):
|
||||
global_pool = maybe_global_pool
|
||||
|
||||
if provider_id is None:
|
||||
return dict(pool)
|
||||
merged = dict(pool)
|
||||
for gp_key, gp_entries in global_pool.items():
|
||||
if not isinstance(gp_entries, list) or not gp_entries:
|
||||
continue
|
||||
# Per-provider shadowing: profile wins whenever it has ANY entries.
|
||||
existing = merged.get(gp_key)
|
||||
if isinstance(existing, list) and existing:
|
||||
continue
|
||||
merged[gp_key] = list(gp_entries)
|
||||
return merged
|
||||
|
||||
provider_entries = pool.get(provider_id)
|
||||
return list(provider_entries) if isinstance(provider_entries, list) else []
|
||||
if isinstance(provider_entries, list) and provider_entries:
|
||||
return list(provider_entries)
|
||||
# Profile has no entries for this provider — fall back to global.
|
||||
global_entries = global_pool.get(provider_id)
|
||||
return list(global_entries) if isinstance(global_entries, list) else []
|
||||
|
||||
|
||||
def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Path:
|
||||
@@ -999,9 +1135,25 @@ def unsuppress_credential_source(provider_id: str, source: str) -> bool:
|
||||
|
||||
|
||||
def get_provider_auth_state(provider_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Return persisted auth state for a provider, or None."""
|
||||
"""Return persisted auth state for a provider, or None.
|
||||
|
||||
In profile mode, falls back to the global-root ``auth.json`` when the
|
||||
profile has no state for this provider. Profile state always wins when
|
||||
present. Writes (``_save_auth_store`` / ``persist_*_credentials``) are
|
||||
unchanged — they still target the profile only. This mirrors
|
||||
``read_credential_pool``'s per-provider shadowing semantics so that
|
||||
``_seed_from_singletons`` can reseed a profile's credential pool from
|
||||
global-scope provider state (e.g. a globally-authenticated Anthropic
|
||||
OAuth or Nous device-code session). See issue #18594 follow-up.
|
||||
"""
|
||||
auth_store = _load_auth_store()
|
||||
return _load_provider_state(auth_store, provider_id)
|
||||
state = _load_provider_state(auth_store, provider_id)
|
||||
if state is not None:
|
||||
return state
|
||||
global_store = _load_global_auth_store()
|
||||
if not global_store:
|
||||
return None
|
||||
return _load_provider_state(global_store, provider_id)
|
||||
|
||||
|
||||
def get_active_provider() -> Optional[str]:
|
||||
@@ -1195,6 +1347,17 @@ def resolve_provider(
|
||||
"vllm": "custom", "llamacpp": "custom",
|
||||
"llama.cpp": "custom", "llama-cpp": "custom",
|
||||
}
|
||||
# Extend with aliases declared in plugins/model-providers/<name>/ that aren't already mapped.
|
||||
# This keeps providers/ as the single source for new aliases while the
|
||||
# hardcoded dict above remains authoritative for existing ones.
|
||||
try:
|
||||
from providers import list_providers as _lp
|
||||
for _pp in _lp():
|
||||
for _alias in _pp.aliases:
|
||||
if _alias not in _PROVIDER_ALIASES:
|
||||
_PROVIDER_ALIASES[_alias] = _pp.name
|
||||
except Exception:
|
||||
pass
|
||||
normalized = _PROVIDER_ALIASES.get(normalized, normalized)
|
||||
|
||||
if normalized == "openrouter":
|
||||
@@ -2480,8 +2643,8 @@ def _resolve_verify(
|
||||
tls_state = tls_state if isinstance(tls_state, dict) else {}
|
||||
|
||||
effective_insecure = (
|
||||
bool(insecure) if insecure is not None
|
||||
else bool(tls_state.get("insecure", False))
|
||||
is_truthy_value(insecure, default=False) if insecure is not None
|
||||
else is_truthy_value(tls_state.get("insecure", False), default=False)
|
||||
)
|
||||
effective_ca = (
|
||||
ca_bundle
|
||||
@@ -2589,6 +2752,208 @@ def _poll_for_token(
|
||||
# Nous Portal — token refresh, agent key minting, model discovery
|
||||
# =============================================================================
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Shared Nous token store — lets OAuth credentials persist across profiles
|
||||
# so a new `hermes --profile <name> auth add nous --type oauth` can one-tap
|
||||
# import instead of running the full device-code flow every time.
|
||||
#
|
||||
# File lives at ${HERMES_SHARED_AUTH_DIR}/nous_auth.json, defaulting to
|
||||
# ~/.hermes/shared/nous_auth.json. It is OUTSIDE any named profile's
|
||||
# HERMES_HOME so named profiles (which typically live under
|
||||
# ~/.hermes/profiles/<name>/) all see the same file.
|
||||
#
|
||||
# Written on successful login and on every runtime refresh so the stored
|
||||
# refresh_token stays current even if one profile refreshes and rotates it.
|
||||
# If ever the stored refresh_token does go stale server-side, import fails
|
||||
# gracefully and the user falls back to the normal device-code flow.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
NOUS_SHARED_STORE_FILENAME = "nous_auth.json"
|
||||
|
||||
|
||||
def _nous_shared_auth_dir() -> Path:
|
||||
"""Resolve the directory that holds the shared Nous token store.
|
||||
|
||||
Honors ``HERMES_SHARED_AUTH_DIR`` so tests can redirect it to a tmp
|
||||
path without touching the real user's home. Defaults to
|
||||
``~/.hermes/shared/``.
|
||||
"""
|
||||
override = os.getenv("HERMES_SHARED_AUTH_DIR", "").strip()
|
||||
if override:
|
||||
return Path(override).expanduser()
|
||||
return Path.home() / ".hermes" / "shared"
|
||||
|
||||
|
||||
def _nous_shared_store_path() -> Path:
|
||||
path = _nous_shared_auth_dir() / NOUS_SHARED_STORE_FILENAME
|
||||
# Seat belt: if pytest is running and this resolves to a path under the
|
||||
# real user's home, refuse rather than silently corrupt cross-profile
|
||||
# state. Tests must set HERMES_SHARED_AUTH_DIR to a tmp_path (conftest
|
||||
# does not do this automatically — mirror the _auth_file_path() guard
|
||||
# so forgetting to set it fails loudly instead of writing to the real
|
||||
# shared store).
|
||||
if os.environ.get("PYTEST_CURRENT_TEST"):
|
||||
real_home_shared = (
|
||||
Path.home() / ".hermes" / "shared" / NOUS_SHARED_STORE_FILENAME
|
||||
).resolve(strict=False)
|
||||
try:
|
||||
resolved = path.resolve(strict=False)
|
||||
except Exception:
|
||||
resolved = path
|
||||
if resolved == real_home_shared:
|
||||
raise RuntimeError(
|
||||
f"Refusing to touch real user shared Nous auth store during test run: "
|
||||
f"{path}. Set HERMES_SHARED_AUTH_DIR to a tmp_path in your test fixture."
|
||||
)
|
||||
return path
|
||||
|
||||
|
||||
def _write_shared_nous_state(state: Dict[str, Any]) -> None:
|
||||
"""Persist a minimal copy of the Nous OAuth state to the shared store.
|
||||
|
||||
Best-effort: any failure is swallowed after logging. The shared store
|
||||
is a convenience layer; the per-profile auth.json remains the source
|
||||
of truth.
|
||||
|
||||
We deliberately omit the short-lived ``agent_key`` (24h TTL, profile-
|
||||
specific) — only the long-lived OAuth tokens are cross-profile useful.
|
||||
"""
|
||||
refresh_token = state.get("refresh_token")
|
||||
access_token = state.get("access_token")
|
||||
if not (isinstance(refresh_token, str) and refresh_token.strip()):
|
||||
# No refresh_token = nothing worth sharing across profiles
|
||||
return
|
||||
if not (isinstance(access_token, str) and access_token.strip()):
|
||||
return
|
||||
|
||||
shared = {
|
||||
"_schema": 1,
|
||||
"access_token": access_token,
|
||||
"refresh_token": refresh_token,
|
||||
"token_type": state.get("token_type") or "Bearer",
|
||||
"scope": state.get("scope") or DEFAULT_NOUS_SCOPE,
|
||||
"client_id": state.get("client_id") or DEFAULT_NOUS_CLIENT_ID,
|
||||
"portal_base_url": state.get("portal_base_url") or DEFAULT_NOUS_PORTAL_URL,
|
||||
"inference_base_url": state.get("inference_base_url") or DEFAULT_NOUS_INFERENCE_URL,
|
||||
"obtained_at": state.get("obtained_at"),
|
||||
"expires_at": state.get("expires_at"),
|
||||
"updated_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
try:
|
||||
path = _nous_shared_store_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||
tmp.write_text(json.dumps(shared, indent=2, sort_keys=True))
|
||||
try:
|
||||
os.chmod(tmp, 0o600)
|
||||
except OSError:
|
||||
pass
|
||||
os.replace(tmp, path)
|
||||
_oauth_trace(
|
||||
"nous_shared_store_written",
|
||||
path=str(path),
|
||||
refresh_token_fp=_token_fingerprint(refresh_token),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to write shared Nous auth store: %s", exc)
|
||||
|
||||
|
||||
def _read_shared_nous_state() -> Optional[Dict[str, Any]]:
|
||||
"""Return the shared Nous OAuth state if present and well-formed.
|
||||
|
||||
Returns ``None`` when the file is missing, unreadable, malformed, or
|
||||
lacks required fields. Callers should treat ``None`` as "no shared
|
||||
credentials available — fall through to device-code".
|
||||
"""
|
||||
try:
|
||||
path = _nous_shared_store_path()
|
||||
except RuntimeError:
|
||||
# Test seat belt tripped — treat as missing
|
||||
return None
|
||||
if not path.is_file():
|
||||
return None
|
||||
try:
|
||||
payload = json.loads(path.read_text())
|
||||
except (OSError, ValueError) as exc:
|
||||
logger.debug("Shared Nous auth store at %s is unreadable: %s", path, exc)
|
||||
return None
|
||||
if not isinstance(payload, dict):
|
||||
return None
|
||||
refresh_token = payload.get("refresh_token")
|
||||
access_token = payload.get("access_token")
|
||||
if not (isinstance(refresh_token, str) and refresh_token.strip()):
|
||||
return None
|
||||
if not (isinstance(access_token, str) and access_token.strip()):
|
||||
return None
|
||||
return payload
|
||||
|
||||
|
||||
def _try_import_shared_nous_state(
|
||||
*,
|
||||
timeout_seconds: float = 15.0,
|
||||
min_key_ttl_seconds: int = 5 * 60,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Attempt to rehydrate Nous OAuth state from the shared store.
|
||||
|
||||
Reads the shared file (if present), runs a forced refresh+mint using
|
||||
the stored refresh_token to produce a fresh access_token + agent_key
|
||||
scoped to this profile, and returns the full auth_state dict ready
|
||||
for ``persist_nous_credentials()``.
|
||||
|
||||
Returns ``None`` when no shared state is available or the rehydrate
|
||||
fails for any reason (expired refresh_token, portal unreachable,
|
||||
etc.) — caller should then fall through to the normal device-code
|
||||
flow.
|
||||
"""
|
||||
shared = _read_shared_nous_state()
|
||||
if not shared:
|
||||
return None
|
||||
|
||||
# Build a full state dict so refresh_nous_oauth_from_state has every
|
||||
# field it needs. force_refresh=True gets us a fresh access_token
|
||||
# for this profile; force_mint=True gets us a fresh agent_key.
|
||||
state: Dict[str, Any] = {
|
||||
"access_token": shared.get("access_token"),
|
||||
"refresh_token": shared.get("refresh_token"),
|
||||
"client_id": shared.get("client_id") or DEFAULT_NOUS_CLIENT_ID,
|
||||
"portal_base_url": shared.get("portal_base_url") or DEFAULT_NOUS_PORTAL_URL,
|
||||
"inference_base_url": shared.get("inference_base_url") or DEFAULT_NOUS_INFERENCE_URL,
|
||||
"token_type": shared.get("token_type") or "Bearer",
|
||||
"scope": shared.get("scope") or DEFAULT_NOUS_SCOPE,
|
||||
"obtained_at": shared.get("obtained_at"),
|
||||
"expires_at": shared.get("expires_at"),
|
||||
"agent_key": None,
|
||||
"agent_key_expires_at": None,
|
||||
"tls": {"insecure": False, "ca_bundle": None},
|
||||
}
|
||||
|
||||
try:
|
||||
refreshed = refresh_nous_oauth_from_state(
|
||||
state,
|
||||
min_key_ttl_seconds=min_key_ttl_seconds,
|
||||
timeout_seconds=timeout_seconds,
|
||||
force_refresh=True,
|
||||
force_mint=True,
|
||||
)
|
||||
except AuthError as exc:
|
||||
_oauth_trace(
|
||||
"nous_shared_import_failed",
|
||||
error_type=type(exc).__name__,
|
||||
error_code=getattr(exc, "code", None),
|
||||
)
|
||||
logger.debug("Shared Nous import failed: %s", exc)
|
||||
return None
|
||||
except Exception as exc:
|
||||
_oauth_trace(
|
||||
"nous_shared_import_failed",
|
||||
error_type=type(exc).__name__,
|
||||
)
|
||||
logger.debug("Shared Nous import failed: %s", exc)
|
||||
return None
|
||||
|
||||
return refreshed
|
||||
|
||||
|
||||
def _refresh_access_token(
|
||||
*,
|
||||
client: httpx.Client,
|
||||
@@ -2991,6 +3356,12 @@ def persist_nous_credentials(
|
||||
_save_provider_state(auth_store, "nous", state)
|
||||
_save_auth_store(auth_store)
|
||||
|
||||
# Mirror to the shared store so a new profile can one-tap import
|
||||
# these credentials via `hermes auth add nous --type oauth`. Best-
|
||||
# effort: any I/O failure is logged and swallowed (the per-profile
|
||||
# auth.json is still the source of truth).
|
||||
_write_shared_nous_state(state)
|
||||
|
||||
pool = load_pool("nous")
|
||||
return next(
|
||||
(e for e in pool.entries() if e.source == NOUS_DEVICE_CODE_SOURCE),
|
||||
@@ -3059,6 +3430,11 @@ def resolve_nous_runtime_credentials(
|
||||
refresh_token_fp=_token_fingerprint(state.get("refresh_token")),
|
||||
access_token_fp=_token_fingerprint(state.get("access_token")),
|
||||
)
|
||||
# Mirror post-refresh state to the shared store so sibling
|
||||
# profiles don't hold stale refresh_tokens after rotation.
|
||||
# Best-effort — any failure is logged and swallowed inside
|
||||
# _write_shared_nous_state.
|
||||
_write_shared_nous_state(state)
|
||||
|
||||
verify = _resolve_verify(insecure=insecure, ca_bundle=ca_bundle, auth_state=state)
|
||||
timeout = httpx.Timeout(timeout_seconds if timeout_seconds else 15.0)
|
||||
@@ -3653,7 +4029,7 @@ def _update_config_for_provider(
|
||||
|
||||
config["model"] = model_cfg
|
||||
|
||||
config_path.write_text(yaml.safe_dump(config, sort_keys=False))
|
||||
atomic_yaml_write(config_path, config, sort_keys=False)
|
||||
return config_path
|
||||
|
||||
|
||||
@@ -3712,7 +4088,7 @@ def _reset_config_provider() -> Path:
|
||||
model["provider"] = "auto"
|
||||
if "base_url" in model:
|
||||
model["base_url"] = OPENROUTER_BASE_URL
|
||||
config_path.write_text(yaml.safe_dump(config, sort_keys=False))
|
||||
atomic_yaml_write(config_path, config, sort_keys=False)
|
||||
return config_path
|
||||
|
||||
|
||||
@@ -3840,7 +4216,7 @@ def _prompt_model_selection(
|
||||
clear_screen=False,
|
||||
title=effective_title,
|
||||
)
|
||||
idx = menu.show()
|
||||
idx: int | None = menu.show() # ty:ignore[invalid-assignment] - TerminalMenu.show() is always `int | None` when multi_select is False / not provided.
|
||||
from hermes_cli.curses_ui import flush_stdin
|
||||
flush_stdin()
|
||||
if idx is None:
|
||||
@@ -4283,7 +4659,8 @@ def _minimax_oauth_login(
|
||||
print(f"Portal: {portal_base_url}")
|
||||
|
||||
with httpx.Client(timeout=httpx.Timeout(timeout_seconds),
|
||||
headers={"Accept": "application/json"}) as client:
|
||||
headers={"Accept": "application/json"},
|
||||
follow_redirects=True) as client:
|
||||
code_data = _minimax_request_user_code(
|
||||
client, portal_base_url=portal_base_url,
|
||||
client_id=pconfig.client_id,
|
||||
@@ -4360,7 +4737,8 @@ def _refresh_minimax_oauth_state(
|
||||
return state
|
||||
|
||||
portal_base_url = state["portal_base_url"]
|
||||
with httpx.Client(timeout=httpx.Timeout(timeout_seconds)) as client:
|
||||
with httpx.Client(timeout=httpx.Timeout(timeout_seconds),
|
||||
follow_redirects=True) as client:
|
||||
response = client.post(
|
||||
f"{portal_base_url}/oauth/token",
|
||||
data={
|
||||
@@ -4598,17 +4976,47 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
)
|
||||
|
||||
try:
|
||||
auth_state = _nous_device_code_login(
|
||||
portal_base_url=getattr(args, "portal_url", None),
|
||||
inference_base_url=getattr(args, "inference_url", None),
|
||||
client_id=getattr(args, "client_id", None) or pconfig.client_id,
|
||||
scope=getattr(args, "scope", None) or pconfig.scope,
|
||||
open_browser=not getattr(args, "no_browser", False),
|
||||
timeout_seconds=timeout_seconds,
|
||||
insecure=insecure,
|
||||
ca_bundle=ca_bundle,
|
||||
min_key_ttl_seconds=5 * 60,
|
||||
)
|
||||
auth_state = None
|
||||
|
||||
# Codex-style auto-import: before launching a fresh device-code
|
||||
# flow, check the shared store for an existing Nous credential
|
||||
# from any other profile. If present, offer to rehydrate it.
|
||||
shared = _read_shared_nous_state()
|
||||
if shared:
|
||||
try:
|
||||
shared_path = _nous_shared_store_path()
|
||||
except RuntimeError:
|
||||
shared_path = None
|
||||
print()
|
||||
if shared_path:
|
||||
print(f"Found existing Nous OAuth credentials at {shared_path}")
|
||||
else:
|
||||
print("Found existing shared Nous OAuth credentials")
|
||||
try:
|
||||
do_import = input("Import these credentials? [Y/n]: ").strip().lower()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
do_import = "y"
|
||||
if do_import in ("", "y", "yes"):
|
||||
print("Rehydrating Nous session from shared credentials...")
|
||||
auth_state = _try_import_shared_nous_state(
|
||||
timeout_seconds=timeout_seconds,
|
||||
min_key_ttl_seconds=5 * 60,
|
||||
)
|
||||
if auth_state is None:
|
||||
print("Could not refresh shared credentials — falling back to device-code login.")
|
||||
|
||||
if auth_state is None:
|
||||
auth_state = _nous_device_code_login(
|
||||
portal_base_url=getattr(args, "portal_url", None),
|
||||
inference_base_url=getattr(args, "inference_url", None),
|
||||
client_id=getattr(args, "client_id", None) or pconfig.client_id,
|
||||
scope=getattr(args, "scope", None) or pconfig.scope,
|
||||
open_browser=not getattr(args, "no_browser", False),
|
||||
timeout_seconds=timeout_seconds,
|
||||
insecure=insecure,
|
||||
ca_bundle=ca_bundle,
|
||||
min_key_ttl_seconds=5 * 60,
|
||||
)
|
||||
|
||||
inference_base_url = auth_state["inference_base_url"]
|
||||
|
||||
@@ -4625,6 +5033,11 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
_save_provider_state(auth_store, "nous", auth_state)
|
||||
saved_to = _save_auth_store(auth_store)
|
||||
|
||||
# Mirror to the shared store so other profiles can one-tap import
|
||||
# these credentials. Best-effort: any I/O failure is logged and
|
||||
# swallowed inside the helper.
|
||||
_write_shared_nous_state(auth_state)
|
||||
|
||||
print()
|
||||
print("Login successful!")
|
||||
print(f" Auth state: {saved_to}")
|
||||
|
||||
@@ -245,6 +245,47 @@ def auth_add_command(args) -> None:
|
||||
return
|
||||
|
||||
if provider == "nous":
|
||||
# Codex-style auto-import: if a shared Nous credential lives at
|
||||
# ~/.hermes/shared/nous_auth.json (written by any previous
|
||||
# successful login), offer to import it instead of running the
|
||||
# full device-code flow. This makes `hermes --profile <name>
|
||||
# auth add nous --type oauth` a one-tap operation for users who
|
||||
# run multiple profiles.
|
||||
shared = auth_mod._read_shared_nous_state()
|
||||
if shared:
|
||||
try:
|
||||
path = auth_mod._nous_shared_store_path()
|
||||
except RuntimeError:
|
||||
path = None
|
||||
print()
|
||||
if path:
|
||||
print(f"Found existing Nous OAuth credentials at {path}")
|
||||
else:
|
||||
print("Found existing shared Nous OAuth credentials")
|
||||
try:
|
||||
do_import = input("Import these credentials? [Y/n]: ").strip().lower()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
do_import = "y"
|
||||
if do_import in ("", "y", "yes"):
|
||||
print("Rehydrating Nous session from shared credentials...")
|
||||
rehydrated = auth_mod._try_import_shared_nous_state(
|
||||
timeout_seconds=getattr(args, "timeout", None) or 15.0,
|
||||
min_key_ttl_seconds=max(
|
||||
60, int(getattr(args, "min_key_ttl_seconds", 5 * 60))
|
||||
),
|
||||
)
|
||||
if rehydrated is not None:
|
||||
custom_label = (getattr(args, "label", None) or "").strip() or None
|
||||
entry = auth_mod.persist_nous_credentials(rehydrated, label=custom_label)
|
||||
shown_label = entry.label if entry is not None else label_from_token(
|
||||
rehydrated.get("access_token", ""), _oauth_default_label(provider, 1),
|
||||
)
|
||||
print(f'Imported {provider} OAuth credentials: "{shown_label}"')
|
||||
return
|
||||
# Rehydrate failed (expired refresh_token, portal down, etc.)
|
||||
# — fall through to device-code flow.
|
||||
print("Could not refresh shared credentials — falling back to device-code login.")
|
||||
|
||||
creds = auth_mod._nous_device_code_login(
|
||||
portal_base_url=getattr(args, "portal_url", None),
|
||||
inference_base_url=getattr(args, "inference_url", None),
|
||||
|
||||
+15
-2
@@ -61,6 +61,9 @@ _EXCLUDED_NAMES = {
|
||||
"cron.pid",
|
||||
}
|
||||
|
||||
# zipfile.open() drops Unix mode bits on extract; restore tightens these to 0600.
|
||||
_SECRET_FILE_NAMES = {".env", "auth.json", "state.db"}
|
||||
|
||||
|
||||
def _should_exclude(rel_path: Path) -> bool:
|
||||
"""Return True if *rel_path* (relative to hermes root) should be skipped."""
|
||||
@@ -381,6 +384,8 @@ def run_import(args) -> None:
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
with zf.open(member) as src, open(target, "wb") as dst:
|
||||
dst.write(src.read())
|
||||
if target.name in _SECRET_FILE_NAMES:
|
||||
os.chmod(target, 0o600)
|
||||
restored += 1
|
||||
except (PermissionError, OSError) as exc:
|
||||
errors.append(f" {rel}: {exc}")
|
||||
@@ -788,9 +793,17 @@ def _prune_pre_update_backups(backup_dir: Path, keep: int) -> int:
|
||||
Returns the number of files deleted. Only touches files matching
|
||||
``pre-update-*.zip`` so hand-made zips dropped in the same directory
|
||||
are never touched.
|
||||
|
||||
``keep`` is floored to 1 because this helper is only called immediately
|
||||
after a fresh backup is written: deleting that backup right after the
|
||||
user paid the disk/CPU cost to create it would leave them worse off
|
||||
than no backup at all (and the wrapper in ``main.py`` would still print
|
||||
a misleading ``Saved: <path>`` line for a file that no longer exists).
|
||||
Operators who genuinely don't want a backup should set
|
||||
``updates.pre_update_backup: false`` in config — that gates creation.
|
||||
"""
|
||||
if keep < 0:
|
||||
keep = 0
|
||||
if keep < 1:
|
||||
keep = 1
|
||||
if not backup_dir.exists():
|
||||
return 0
|
||||
|
||||
|
||||
@@ -0,0 +1,244 @@
|
||||
"""`hermes checkpoints` CLI subcommand.
|
||||
|
||||
Gives users direct visibility and control over the filesystem checkpoint
|
||||
store at ``~/.hermes/checkpoints/``. Actions:
|
||||
|
||||
hermes checkpoints # same as `status`
|
||||
hermes checkpoints status # total size, project count, breakdown
|
||||
hermes checkpoints list # per-project checkpoint counts + workdir
|
||||
hermes checkpoints prune [opts] # force a sweep (ignores the 24h marker)
|
||||
hermes checkpoints clear [-f] # nuke the entire base (asks first)
|
||||
hermes checkpoints clear-legacy # delete just the legacy-* archives
|
||||
|
||||
Examples::
|
||||
|
||||
hermes checkpoints
|
||||
hermes checkpoints prune --retention-days 3 --max-size-mb 200
|
||||
hermes checkpoints clear -f
|
||||
|
||||
None of these require the agent to be running. Safe to call any time.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
def _fmt_bytes(n: int) -> str:
|
||||
units = ("B", "KB", "MB", "GB", "TB")
|
||||
size = float(n or 0)
|
||||
for unit in units:
|
||||
if size < 1024 or unit == units[-1]:
|
||||
if unit == "B":
|
||||
return f"{int(size)} {unit}"
|
||||
return f"{size:.1f} {unit}"
|
||||
size /= 1024
|
||||
return f"{size:.1f} TB"
|
||||
|
||||
|
||||
def _fmt_ts(ts: Any) -> str:
|
||||
try:
|
||||
return datetime.fromtimestamp(float(ts)).strftime("%Y-%m-%d %H:%M")
|
||||
except (TypeError, ValueError):
|
||||
return "—"
|
||||
|
||||
|
||||
def _fmt_age(ts: Any) -> str:
|
||||
try:
|
||||
age = time.time() - float(ts)
|
||||
except (TypeError, ValueError):
|
||||
return "—"
|
||||
if age < 0:
|
||||
return "now"
|
||||
if age < 60:
|
||||
return f"{int(age)}s ago"
|
||||
if age < 3600:
|
||||
return f"{int(age / 60)}m ago"
|
||||
if age < 86400:
|
||||
return f"{int(age / 3600)}h ago"
|
||||
return f"{int(age / 86400)}d ago"
|
||||
|
||||
|
||||
def cmd_status(args: argparse.Namespace) -> int:
|
||||
from tools.checkpoint_manager import store_status
|
||||
|
||||
info = store_status()
|
||||
base = info["base"]
|
||||
print(f"Checkpoint base: {base}")
|
||||
print(f"Total size: {_fmt_bytes(info['total_size_bytes'])}")
|
||||
print(f" store/ {_fmt_bytes(info['store_size_bytes'])}")
|
||||
print(f" legacy-* {_fmt_bytes(info['legacy_size_bytes'])}")
|
||||
print(f"Projects: {info['project_count']}")
|
||||
|
||||
projects = sorted(
|
||||
info["projects"],
|
||||
key=lambda p: (p.get("last_touch") or 0),
|
||||
reverse=True,
|
||||
)
|
||||
if projects:
|
||||
print()
|
||||
print(f" {'WORKDIR':<60} {'COMMITS':>7} {'LAST TOUCH':>12} STATE")
|
||||
for p in projects[: args.limit if hasattr(args, "limit") and args.limit else 20]:
|
||||
wd = p.get("workdir") or "(unknown)"
|
||||
if len(wd) > 60:
|
||||
wd = "…" + wd[-59:]
|
||||
exists = p.get("exists")
|
||||
state = "live" if exists else "orphan"
|
||||
commits = p.get("commits", 0)
|
||||
last = _fmt_age(p.get("last_touch"))
|
||||
print(f" {wd:<60} {commits:>7} {last:>12} {state}")
|
||||
|
||||
legacy = info.get("legacy_archives", [])
|
||||
if legacy:
|
||||
print()
|
||||
print(f"Legacy archives ({len(legacy)}):")
|
||||
for arch in sorted(legacy, key=lambda a: a.get("mtime", 0), reverse=True):
|
||||
print(f" {arch['name']:<40} {_fmt_bytes(arch['size_bytes']):>10}")
|
||||
print()
|
||||
print("Clear with: hermes checkpoints clear-legacy")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_list(args: argparse.Namespace) -> int:
|
||||
# `list` is just a terser status — already covered.
|
||||
return cmd_status(args)
|
||||
|
||||
|
||||
def cmd_prune(args: argparse.Namespace) -> int:
|
||||
from tools.checkpoint_manager import prune_checkpoints
|
||||
|
||||
retention_days = args.retention_days
|
||||
max_size_mb = args.max_size_mb
|
||||
|
||||
print("Pruning checkpoint store…")
|
||||
print(f" retention_days: {retention_days}")
|
||||
print(f" delete_orphans: {not args.keep_orphans}")
|
||||
print(f" max_total_size_mb: {max_size_mb}")
|
||||
print()
|
||||
|
||||
result = prune_checkpoints(
|
||||
retention_days=retention_days,
|
||||
delete_orphans=not args.keep_orphans,
|
||||
max_total_size_mb=max_size_mb,
|
||||
)
|
||||
print(f"Scanned: {result['scanned']}")
|
||||
print(f"Deleted orphan: {result['deleted_orphan']}")
|
||||
print(f"Deleted stale: {result['deleted_stale']}")
|
||||
print(f"Errors: {result['errors']}")
|
||||
print(f"Bytes reclaimed: {_fmt_bytes(result['bytes_freed'])}")
|
||||
return 0
|
||||
|
||||
|
||||
def _confirm(prompt: str) -> bool:
|
||||
try:
|
||||
resp = input(f"{prompt} [y/N]: ").strip().lower()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print()
|
||||
return False
|
||||
return resp in ("y", "yes")
|
||||
|
||||
|
||||
def cmd_clear(args: argparse.Namespace) -> int:
|
||||
from tools.checkpoint_manager import CHECKPOINT_BASE, clear_all, store_status
|
||||
|
||||
info = store_status()
|
||||
if info["total_size_bytes"] == 0 and not Path(CHECKPOINT_BASE).exists():
|
||||
print("Nothing to clear — checkpoint base does not exist.")
|
||||
return 0
|
||||
|
||||
print(f"This will delete the ENTIRE checkpoint base at {info['base']}")
|
||||
print(f" size: {_fmt_bytes(info['total_size_bytes'])}")
|
||||
print(f" projects: {info['project_count']}")
|
||||
print(f" legacy dirs: {len(info.get('legacy_archives', []))}")
|
||||
print()
|
||||
print("All /rollback history for every working directory will be lost.")
|
||||
if not args.force and not _confirm("Proceed?"):
|
||||
print("Aborted.")
|
||||
return 1
|
||||
|
||||
result = clear_all()
|
||||
if result["deleted"]:
|
||||
print(f"Cleared. Reclaimed {_fmt_bytes(result['bytes_freed'])}.")
|
||||
return 0
|
||||
print("Could not clear checkpoint base (see logs).")
|
||||
return 2
|
||||
|
||||
|
||||
def cmd_clear_legacy(args: argparse.Namespace) -> int:
|
||||
from tools.checkpoint_manager import clear_legacy, store_status
|
||||
|
||||
info = store_status()
|
||||
legacy = info.get("legacy_archives", [])
|
||||
if not legacy:
|
||||
print("No legacy archives to clear.")
|
||||
return 0
|
||||
|
||||
total = sum(a.get("size_bytes", 0) for a in legacy)
|
||||
print(f"Found {len(legacy)} legacy archive(s), total {_fmt_bytes(total)}:")
|
||||
for arch in legacy:
|
||||
print(f" {arch['name']:<40} {_fmt_bytes(arch['size_bytes']):>10}")
|
||||
print()
|
||||
print("Legacy archives hold pre-v2 per-project shadow repos, moved aside")
|
||||
print("during the single-store migration. Delete when you're confident")
|
||||
print("you don't need the old /rollback history.")
|
||||
if not args.force and not _confirm("Delete all legacy archives?"):
|
||||
print("Aborted.")
|
||||
return 1
|
||||
|
||||
result = clear_legacy()
|
||||
print(f"Deleted {result['deleted']} archive(s), reclaimed {_fmt_bytes(result['bytes_freed'])}.")
|
||||
return 0
|
||||
|
||||
|
||||
def register_cli(parser: argparse.ArgumentParser) -> None:
|
||||
"""Wire subcommands onto the ``hermes checkpoints`` parser."""
|
||||
parser.set_defaults(func=cmd_status) # bare `hermes checkpoints` → status
|
||||
subs = parser.add_subparsers(dest="checkpoints_command", metavar="COMMAND")
|
||||
|
||||
p_status = subs.add_parser(
|
||||
"status",
|
||||
help="Show total size, project count, and per-project breakdown",
|
||||
)
|
||||
p_status.add_argument("--limit", type=int, default=20,
|
||||
help="Max projects to list (default 20)")
|
||||
p_status.set_defaults(func=cmd_status)
|
||||
|
||||
p_list = subs.add_parser(
|
||||
"list",
|
||||
help="Alias for 'status'",
|
||||
)
|
||||
p_list.add_argument("--limit", type=int, default=20)
|
||||
p_list.set_defaults(func=cmd_list)
|
||||
|
||||
p_prune = subs.add_parser(
|
||||
"prune",
|
||||
help="Delete orphan/stale checkpoints and GC the store",
|
||||
)
|
||||
p_prune.add_argument("--retention-days", type=int, default=7,
|
||||
help="Drop projects whose last_touch is older than N days (default 7)")
|
||||
p_prune.add_argument("--max-size-mb", type=int, default=500,
|
||||
help="After orphan/stale prune, drop oldest commits "
|
||||
"per project until total size <= this (default 500)")
|
||||
p_prune.add_argument("--keep-orphans", action="store_true",
|
||||
help="Skip deleting projects whose workdir no longer exists")
|
||||
p_prune.set_defaults(func=cmd_prune)
|
||||
|
||||
p_clear = subs.add_parser(
|
||||
"clear",
|
||||
help="Delete the entire checkpoint base (all /rollback history)",
|
||||
)
|
||||
p_clear.add_argument("-f", "--force", action="store_true",
|
||||
help="Skip confirmation prompt")
|
||||
p_clear.set_defaults(func=cmd_clear)
|
||||
|
||||
p_legacy = subs.add_parser(
|
||||
"clear-legacy",
|
||||
help="Delete only the legacy-<ts>/ archives from v1 migration",
|
||||
)
|
||||
p_legacy.add_argument("-f", "--force", action="store_true",
|
||||
help="Skip confirmation prompt")
|
||||
p_legacy.set_defaults(func=cmd_clear_legacy)
|
||||
+9
-1
@@ -235,6 +235,9 @@ def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
|
||||
"""
|
||||
findings: list[tuple[Path, str]] = []
|
||||
|
||||
if not source_dir.exists():
|
||||
return findings
|
||||
|
||||
# Direct state files in the root
|
||||
for name in ("todo.json", "sessions", "logs"):
|
||||
candidate = source_dir / name
|
||||
@@ -243,7 +246,12 @@ def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
|
||||
findings.append((candidate, f"Root {kind}: {name}"))
|
||||
|
||||
# State files inside workspace directories
|
||||
for child in sorted(source_dir.iterdir()):
|
||||
try:
|
||||
children = sorted(source_dir.iterdir())
|
||||
except OSError:
|
||||
return findings
|
||||
|
||||
for child in children:
|
||||
if not child.is_dir() or child.name.startswith("."):
|
||||
continue
|
||||
# Check for workspace-like subdirectories
|
||||
|
||||
+184
-62
@@ -10,6 +10,7 @@ To add an alias: set ``aliases=("short",)`` on the existing ``CommandDef``.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
@@ -19,6 +20,10 @@ from collections.abc import Callable, Mapping
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from utils import is_truthy_value
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# prompt_toolkit is an optional CLI dependency — only needed for
|
||||
# SlashCommandCompleter and SlashCommandAutoSuggest. Gateway and test
|
||||
# environments that lack it must still be able to import this module
|
||||
@@ -59,7 +64,9 @@ class CommandDef:
|
||||
COMMAND_REGISTRY: list[CommandDef] = [
|
||||
# Session
|
||||
CommandDef("new", "Start a new session (fresh session ID + history)", "Session",
|
||||
aliases=("reset",)),
|
||||
aliases=("reset",), args_hint="[name]"),
|
||||
CommandDef("topic", "Enable or inspect Telegram DM topic sessions", "Session",
|
||||
gateway_only=True, args_hint="[off|help|session-id]"),
|
||||
CommandDef("clear", "Clear screen and start a new session", "Session",
|
||||
cli_only=True),
|
||||
CommandDef("redraw", "Force a full UI repaint (recovers from terminal drift)", "Session",
|
||||
@@ -93,6 +100,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
aliases=("q",), args_hint="<prompt>"),
|
||||
CommandDef("steer", "Inject a message after the next tool call without interrupting", "Session",
|
||||
args_hint="<prompt>"),
|
||||
CommandDef("goal", "Set a standing goal Hermes works on across turns until achieved", "Session",
|
||||
args_hint="[text | pause | resume | clear | status]"),
|
||||
CommandDef("status", "Show session info", "Session"),
|
||||
CommandDef("profile", "Show active profile name and home directory", "Info"),
|
||||
CommandDef("sethome", "Set this chat as the home channel", "Session",
|
||||
@@ -151,6 +160,11 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("curator", "Background skill maintenance (status, run, pin, archive)",
|
||||
"Tools & Skills", args_hint="[subcommand]",
|
||||
subcommands=("status", "run", "pause", "resume", "pin", "unpin", "restore")),
|
||||
CommandDef("kanban", "Multi-profile collaboration board (tasks, links, comments)",
|
||||
"Tools & Skills", args_hint="[subcommand]",
|
||||
subcommands=("list", "ls", "show", "create", "assign", "link", "unlink",
|
||||
"claim", "comment", "complete", "block", "unblock", "archive",
|
||||
"tail", "dispatch", "context", "init", "gc")),
|
||||
CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
|
||||
cli_only=True),
|
||||
CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
|
||||
@@ -366,7 +380,7 @@ def _resolve_config_gates() -> set[str]:
|
||||
else:
|
||||
val = None
|
||||
break
|
||||
if val:
|
||||
if is_truthy_value(val, default=False):
|
||||
result.add(cmd.name)
|
||||
return result
|
||||
|
||||
@@ -387,6 +401,11 @@ def _is_gateway_available(cmd: CommandDef, config_overrides: set[str] | None = N
|
||||
return False
|
||||
|
||||
|
||||
def _requires_argument(args_hint: str) -> bool:
|
||||
"""Return True when selecting a command without text would be incomplete."""
|
||||
return args_hint.strip().startswith("<")
|
||||
|
||||
|
||||
def gateway_help_lines() -> list[str]:
|
||||
"""Generate gateway help text lines from the registry."""
|
||||
overrides = _resolve_config_gates()
|
||||
@@ -443,7 +462,9 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
|
||||
|
||||
Telegram command names cannot contain hyphens, so they are replaced with
|
||||
underscores. Aliases are skipped -- Telegram shows one menu entry per
|
||||
canonical command.
|
||||
canonical command. Commands that require arguments are skipped because
|
||||
selecting a Telegram BotCommand sends only ``/command`` and would execute
|
||||
an incomplete command.
|
||||
|
||||
Plugin-registered slash commands are included so plugins get native
|
||||
autocomplete in Telegram without touching core code.
|
||||
@@ -453,10 +474,14 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
|
||||
for cmd in COMMAND_REGISTRY:
|
||||
if not _is_gateway_available(cmd, overrides):
|
||||
continue
|
||||
if _requires_argument(cmd.args_hint):
|
||||
continue
|
||||
tg_name = _sanitize_telegram_name(cmd.name)
|
||||
if tg_name:
|
||||
result.append((tg_name, cmd.description))
|
||||
for name, description, _args_hint in _iter_plugin_command_entries():
|
||||
for name, description, args_hint in _iter_plugin_command_entries():
|
||||
if _requires_argument(args_hint):
|
||||
continue
|
||||
tg_name = _sanitize_telegram_name(name)
|
||||
if tg_name:
|
||||
result.append((tg_name, description))
|
||||
@@ -490,9 +515,9 @@ def _sanitize_telegram_name(raw: str) -> str:
|
||||
|
||||
|
||||
def _clamp_command_names(
|
||||
entries: list[tuple[str, str]],
|
||||
entries: list[tuple[str, ...]],
|
||||
reserved: set[str],
|
||||
) -> list[tuple[str, str]]:
|
||||
) -> list[tuple[str, ...]]:
|
||||
"""Enforce 32-char command name limit with collision avoidance.
|
||||
|
||||
Both Telegram and Discord cap slash command names at 32 characters.
|
||||
@@ -500,10 +525,15 @@ def _clamp_command_names(
|
||||
(against *reserved* names or earlier entries in the same batch), the name is
|
||||
shortened to 31 chars and a digit ``0``-``9`` is appended to differentiate.
|
||||
If all 10 digit slots are taken the entry is silently dropped.
|
||||
|
||||
Accepts tuples of any length >= 2. Extra elements beyond ``(name, desc)``
|
||||
(e.g. ``cmd_key``) are passed through unchanged, so callers can attach
|
||||
metadata that survives the rename.
|
||||
"""
|
||||
used: set[str] = set(reserved)
|
||||
result: list[tuple[str, str]] = []
|
||||
for name, desc in entries:
|
||||
result: list[tuple] = []
|
||||
for entry in entries:
|
||||
name, desc, *extra = entry
|
||||
if len(name) > _CMD_NAME_LIMIT:
|
||||
candidate = name[:_CMD_NAME_LIMIT]
|
||||
if candidate in used:
|
||||
@@ -519,7 +549,7 @@ def _clamp_command_names(
|
||||
if name in used:
|
||||
continue
|
||||
used.add(name)
|
||||
result.append((name, desc))
|
||||
result.append((name, desc, *extra))
|
||||
return result
|
||||
|
||||
|
||||
@@ -602,13 +632,26 @@ def _collect_gateway_skill_entries(
|
||||
try:
|
||||
from agent.skill_commands import get_skill_commands
|
||||
from tools.skills_tool import SKILLS_DIR
|
||||
from agent.skill_utils import get_external_skills_dirs
|
||||
_skills_dir = str(SKILLS_DIR.resolve())
|
||||
_hub_dir = str((SKILLS_DIR / ".hub").resolve())
|
||||
_hub_dir = str((SKILLS_DIR / ".hub").resolve()).rstrip("/") + "/"
|
||||
# Build set of allowed directory prefixes: local skills dir + any
|
||||
# user-configured ``skills.external_dirs``. Ensure each prefix ends
|
||||
# with ``/`` so ``/my-skills`` does not also match ``/my-skills-extra``.
|
||||
# Without this widening, external skills are visible in
|
||||
# ``hermes skills list`` and the agent's ``/skill-name`` dispatch but
|
||||
# silently excluded from gateway slash menus (#8110).
|
||||
_allowed_prefixes = [_skills_dir.rstrip("/") + "/"]
|
||||
_allowed_prefixes.extend(
|
||||
str(d).rstrip("/") + "/" for d in get_external_skills_dirs()
|
||||
)
|
||||
skill_cmds = get_skill_commands()
|
||||
for cmd_key in sorted(skill_cmds):
|
||||
info = skill_cmds[cmd_key]
|
||||
skill_path = info.get("skill_md_path", "")
|
||||
if not skill_path.startswith(_skills_dir):
|
||||
if not skill_path:
|
||||
continue
|
||||
if not any(skill_path.startswith(prefix) for prefix in _allowed_prefixes):
|
||||
continue
|
||||
if skill_path.startswith(_hub_dir):
|
||||
continue
|
||||
@@ -626,17 +669,15 @@ def _collect_gateway_skill_entries(
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Clamp names; _clamp_command_names works on (name, desc) pairs so we
|
||||
# need to zip/unzip.
|
||||
skill_pairs = [(n, d) for n, d, _ in skill_triples]
|
||||
key_by_pair = {(n, d): k for n, d, k in skill_triples}
|
||||
skill_pairs = _clamp_command_names(skill_pairs, reserved_names)
|
||||
# Clamp names; cmd_key is passed through as extra payload so it survives
|
||||
# any clamp-induced renames.
|
||||
skill_triples = _clamp_command_names(skill_triples, reserved_names)
|
||||
|
||||
# Skills fill remaining slots — only tier that gets trimmed
|
||||
remaining = max(0, max_slots - len(all_entries))
|
||||
hidden_count = max(0, len(skill_pairs) - remaining)
|
||||
for n, d in skill_pairs[:remaining]:
|
||||
all_entries.append((n, d, key_by_pair.get((n, d), "")))
|
||||
hidden_count = max(0, len(skill_triples) - remaining)
|
||||
for n, d, k in skill_triples[:remaining]:
|
||||
all_entries.append((n, d, k))
|
||||
|
||||
return all_entries[:max_slots], hidden_count
|
||||
|
||||
@@ -712,24 +753,40 @@ def discord_skill_commands(
|
||||
def discord_skill_commands_by_category(
|
||||
reserved_names: set[str],
|
||||
) -> tuple[dict[str, list[tuple[str, str, str]]], list[tuple[str, str, str]], int]:
|
||||
"""Return skill entries organized by category for Discord ``/skill`` subcommand groups.
|
||||
"""Return skill entries organized by category for Discord ``/skill`` autocomplete.
|
||||
|
||||
Skills whose directory is nested at least 2 levels under ``SKILLS_DIR``
|
||||
Skills whose directory is nested at least 2 levels under a scan root
|
||||
(e.g. ``creative/ascii-art/SKILL.md``) are grouped by their top-level
|
||||
category. Root-level skills (e.g. ``dogfood/SKILL.md``) are returned as
|
||||
*uncategorized* — the caller should register them as direct subcommands
|
||||
of the ``/skill`` group.
|
||||
*uncategorized*.
|
||||
|
||||
The same filtering as :func:`discord_skill_commands` is applied: hub
|
||||
skills excluded, per-platform disabled excluded, names clamped.
|
||||
Scan roots include the local ``SKILLS_DIR`` **and** any configured
|
||||
``skills.external_dirs`` — matching the widened filter applied to the
|
||||
flat ``discord_skill_commands()`` collector in #18741. Without this
|
||||
parity, external-dir skills are visible via ``hermes skills list`` and
|
||||
the agent's ``/skill-name`` dispatch but silently absent from Discord's
|
||||
``/skill`` autocomplete.
|
||||
|
||||
Filtering mirrors :func:`discord_skill_commands`: hub skills excluded,
|
||||
per-platform disabled excluded, names clamped to 32 chars, descriptions
|
||||
clamped to 100 chars.
|
||||
|
||||
The legacy 25-group × 25-subcommand caps (from the old nested
|
||||
``/skill <cat> <name>`` layout) are **not** applied — the live caller
|
||||
(``_register_skill_group`` in ``gateway/platforms/discord.py``, refactored
|
||||
in PR #11580) flattens these results and feeds them into a single
|
||||
autocomplete callback, which scales to thousands of entries without any
|
||||
per-command payload concerns. ``hidden_count`` is retained in the return
|
||||
tuple for backward compatibility and still reports skills dropped for
|
||||
other reasons (32-char clamp collision vs a reserved name).
|
||||
|
||||
Returns:
|
||||
``(categories, uncategorized, hidden_count)``
|
||||
|
||||
- *categories*: ``{category_name: [(name, description, cmd_key), ...]}``
|
||||
- *uncategorized*: ``[(name, description, cmd_key), ...]``
|
||||
- *hidden_count*: skills dropped due to Discord group limits
|
||||
(25 subcommand groups, 25 subcommands per group)
|
||||
- *hidden_count*: skills dropped due to name clamp collisions
|
||||
against already-registered command names.
|
||||
"""
|
||||
from pathlib import Path as _P
|
||||
|
||||
@@ -743,14 +800,33 @@ def discord_skill_commands_by_category(
|
||||
# Collect raw skill data --------------------------------------------------
|
||||
categories: dict[str, list[tuple[str, str, str]]] = {}
|
||||
uncategorized: list[tuple[str, str, str]] = []
|
||||
_names_used: set[str] = set(reserved_names)
|
||||
# Map clamped-32-char-name → what it came from, so we can emit an
|
||||
# actionable warning on collision. Reserved (gateway-builtin) command
|
||||
# names are marked with a sentinel so the warning distinguishes
|
||||
# "skill collided with a reserved command" from "two skills collided
|
||||
# on the 32-char clamp" — the latter is the rename-worthy case.
|
||||
_names_used: dict[str, str] = {n: "<reserved>" for n in reserved_names}
|
||||
hidden = 0
|
||||
|
||||
try:
|
||||
from agent.skill_commands import get_skill_commands
|
||||
from agent.skill_utils import get_external_skills_dirs
|
||||
from tools.skills_tool import SKILLS_DIR
|
||||
|
||||
_skills_dir = SKILLS_DIR.resolve()
|
||||
_hub_dir = (SKILLS_DIR / ".hub").resolve()
|
||||
# Build list of (resolved_root, is_local) tuples. Each external dir
|
||||
# becomes its own scan root for category derivation — a skill at
|
||||
# ``<external>/mlops/foo/SKILL.md`` is still categorized as "mlops".
|
||||
_scan_roots: list[_P] = [_skills_dir]
|
||||
try:
|
||||
for ext in get_external_skills_dirs():
|
||||
try:
|
||||
_scan_roots.append(_P(ext).resolve())
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
skill_cmds = get_skill_commands()
|
||||
|
||||
for cmd_key in sorted(skill_cmds):
|
||||
@@ -759,33 +835,72 @@ def discord_skill_commands_by_category(
|
||||
if not skill_path:
|
||||
continue
|
||||
sp = _P(skill_path).resolve()
|
||||
# Skip skills outside SKILLS_DIR or from the hub
|
||||
if not str(sp).startswith(str(_skills_dir)):
|
||||
continue
|
||||
# Hub skills are loaded via the skill hub, not surfaced as
|
||||
# slash commands.
|
||||
if str(sp).startswith(str(_hub_dir)):
|
||||
continue
|
||||
# Accept skill if it lives under any scan root; record the
|
||||
# matching root so we can derive the category correctly.
|
||||
matched_root: _P | None = None
|
||||
for root in _scan_roots:
|
||||
try:
|
||||
sp.relative_to(root)
|
||||
except ValueError:
|
||||
continue
|
||||
matched_root = root
|
||||
break
|
||||
if matched_root is None:
|
||||
continue
|
||||
|
||||
skill_name = info.get("name", "")
|
||||
if skill_name in _platform_disabled:
|
||||
continue
|
||||
|
||||
raw_name = cmd_key.lstrip("/")
|
||||
# Clamp to 32 chars (Discord limit)
|
||||
# Clamp to 32 chars (Discord per-command name limit)
|
||||
discord_name = raw_name[:32]
|
||||
if discord_name in _names_used:
|
||||
# Two skills whose first 32 chars are identical. One wins
|
||||
# (the first one seen, which is alphabetical because the
|
||||
# caller iterates ``sorted(skill_cmds)``); the other is
|
||||
# dropped from Discord's /skill autocomplete.
|
||||
#
|
||||
# Silently counting this as ``hidden`` (the old behavior)
|
||||
# meant skill authors had no way to discover the drop —
|
||||
# their skill just didn't appear in the picker. Emit a
|
||||
# WARNING naming both sides so the author can rename the
|
||||
# losing skill's frontmatter name to something with a
|
||||
# distinct 32-char prefix.
|
||||
prior = _names_used[discord_name]
|
||||
if prior == "<reserved>":
|
||||
logger.warning(
|
||||
"Discord /skill: %r (from %r) collides on its 32-char "
|
||||
"clamp with a reserved gateway command name %r — the "
|
||||
"skill will not appear in the /skill autocomplete. "
|
||||
"Rename the skill's frontmatter ``name:`` to differ "
|
||||
"in its first 32 chars.",
|
||||
discord_name, cmd_key, discord_name,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Discord /skill: %r and %r both clamp to %r on "
|
||||
"Discord's 32-char command-name limit — only %r "
|
||||
"will appear in the /skill autocomplete. Rename "
|
||||
"one skill's frontmatter ``name:`` to differ in "
|
||||
"its first 32 chars.",
|
||||
prior, cmd_key, discord_name, prior,
|
||||
)
|
||||
hidden += 1
|
||||
continue
|
||||
_names_used.add(discord_name)
|
||||
_names_used[discord_name] = cmd_key
|
||||
|
||||
desc = info.get("description", "")
|
||||
if len(desc) > 100:
|
||||
desc = desc[:97] + "..."
|
||||
|
||||
# Determine category from the relative path within SKILLS_DIR.
|
||||
# e.g. creative/ascii-art/SKILL.md → parts = ("creative", "ascii-art")
|
||||
try:
|
||||
rel = sp.parent.relative_to(_skills_dir)
|
||||
except ValueError:
|
||||
continue
|
||||
# Determine category from the relative path within the matched
|
||||
# scan root. e.g. creative/ascii-art/SKILL.md → ("creative", ...)
|
||||
rel = sp.parent.relative_to(matched_root)
|
||||
parts = rel.parts
|
||||
if len(parts) >= 2:
|
||||
cat = parts[0]
|
||||
@@ -795,28 +910,7 @@ def discord_skill_commands_by_category(
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Enforce Discord limits: 25 subcommand groups, 25 subcommands each ------
|
||||
_MAX_GROUPS = 25
|
||||
_MAX_PER_GROUP = 25
|
||||
|
||||
trimmed_categories: dict[str, list[tuple[str, str, str]]] = {}
|
||||
group_count = 0
|
||||
for cat in sorted(categories):
|
||||
if group_count >= _MAX_GROUPS:
|
||||
hidden += len(categories[cat])
|
||||
continue
|
||||
entries = categories[cat][:_MAX_PER_GROUP]
|
||||
hidden += max(0, len(categories[cat]) - _MAX_PER_GROUP)
|
||||
trimmed_categories[cat] = entries
|
||||
group_count += 1
|
||||
|
||||
# Uncategorized skills also count against the 25 top-level limit
|
||||
remaining_slots = _MAX_GROUPS - group_count
|
||||
if len(uncategorized) > remaining_slots:
|
||||
hidden += len(uncategorized) - remaining_slots
|
||||
uncategorized = uncategorized[:remaining_slots]
|
||||
|
||||
return trimmed_categories, uncategorized, hidden
|
||||
return categories, uncategorized, hidden
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -829,6 +923,13 @@ def discord_skill_commands_by_category(
|
||||
_SLACK_MAX_SLASH_COMMANDS = 50
|
||||
_SLACK_NAME_LIMIT = 32
|
||||
_SLACK_INVALID_CHARS = re.compile(r"[^a-z0-9_\-]")
|
||||
_SLACK_RESERVED_COMMANDS = frozenset({
|
||||
# Built-in Slack slash commands that cannot be registered by apps.
|
||||
# https://slack.com/help/articles/201259356-Use-built-in-slash-commands
|
||||
"me", "status", "away", "dnd", "shrug", "remind", "msg", "feed",
|
||||
"who", "collapse", "expand", "leave", "join", "open", "search",
|
||||
"topic", "mute", "pro", "shortcuts",
|
||||
})
|
||||
|
||||
|
||||
def _sanitize_slack_name(raw: str) -> str:
|
||||
@@ -855,6 +956,10 @@ def slack_native_slashes() -> list[tuple[str, str, str]]:
|
||||
documented form (e.g. ``/background``, ``/bg``, and ``/btw`` all work).
|
||||
Plugin-registered slash commands are included too.
|
||||
|
||||
Commands whose sanitized name collides with a Slack built-in
|
||||
(e.g. ``/status``, ``/me``, ``/join``) are silently skipped. Users
|
||||
can still reach them via ``/hermes <command>``.
|
||||
|
||||
Results are clamped to Slack's 50-command limit with duplicate-name
|
||||
avoidance. ``/hermes`` is always reserved as the first entry so the
|
||||
legacy ``/hermes <subcommand>`` form keeps working for anything that
|
||||
@@ -872,6 +977,8 @@ def slack_native_slashes() -> list[tuple[str, str, str]]:
|
||||
slack_name = _sanitize_slack_name(name)
|
||||
if not slack_name or slack_name in seen:
|
||||
return
|
||||
if slack_name in _SLACK_RESERVED_COMMANDS:
|
||||
return
|
||||
if len(entries) >= _SLACK_MAX_SLASH_COMMANDS:
|
||||
return
|
||||
# Slack description cap is 2000 chars; keep it short.
|
||||
@@ -1021,6 +1128,12 @@ class SlashCommandCompleter(Completer):
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
# Commands that open pickers when run without arguments.
|
||||
# These should NOT receive a trailing space in completions because:
|
||||
# - The TUI's submit handler applies completions on Enter if input differs
|
||||
# - Adding space makes "/model" → "/model " which blocks picker execution
|
||||
_PICKER_COMMANDS = frozenset({"model", "skin", "personality"})
|
||||
|
||||
@staticmethod
|
||||
def _completion_text(cmd_name: str, word: str) -> str:
|
||||
"""Return replacement text for a completion.
|
||||
@@ -1029,8 +1142,17 @@ class SlashCommandCompleter(Completer):
|
||||
returning ``help`` would be a no-op and prompt_toolkit suppresses the
|
||||
menu. Appending a trailing space keeps the dropdown visible and makes
|
||||
backspacing retrigger it naturally.
|
||||
|
||||
However, commands that open pickers (model, skin, personality) should
|
||||
NOT get a trailing space — the TUI would apply the completion on Enter
|
||||
and block the picker from opening.
|
||||
"""
|
||||
return f"{cmd_name} " if cmd_name == word else cmd_name
|
||||
if cmd_name != word:
|
||||
return cmd_name
|
||||
# Don't add space for picker commands — allows Enter to execute them
|
||||
if cmd_name in SlashCommandCompleter._PICKER_COMMANDS:
|
||||
return cmd_name
|
||||
return f"{cmd_name} "
|
||||
|
||||
@staticmethod
|
||||
def _extract_path_word(text: str) -> str | None:
|
||||
|
||||
+226
-22
@@ -400,7 +400,12 @@ DEFAULT_CONFIG = {
|
||||
# The gateway stops accepting new work, waits for running agents
|
||||
# to finish, then interrupts any remaining runs after the timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
"restart_drain_timeout": 60,
|
||||
#
|
||||
# 180s is calibrated for realistic in-flight agent turns: a typical
|
||||
# coding conversation mid-reasoning runs 60–150s per call, so a 60s
|
||||
# budget routinely interrupted legitimate work on /restart. Raise
|
||||
# further in config.yaml if you run very-long-reasoning models.
|
||||
"restart_drain_timeout": 180,
|
||||
# Max app-level retry attempts for API errors (connection drops,
|
||||
# provider timeouts, 5xx, etc.) before the agent surfaces the
|
||||
# failure. The OpenAI SDK already does its own low-level retries
|
||||
@@ -457,6 +462,7 @@ DEFAULT_CONFIG = {
|
||||
# remains available as a tool regardless of this setting — the routing
|
||||
# only controls how inbound user images are presented.
|
||||
"image_input_mode": "auto",
|
||||
"disabled_toolsets": [],
|
||||
},
|
||||
|
||||
"terminal": {
|
||||
@@ -538,12 +544,25 @@ DEFAULT_CONFIG = {
|
||||
# via TERMINAL_LOCAL_PERSISTENT env var.
|
||||
"persistent_shell": True,
|
||||
},
|
||||
|
||||
|
||||
"web": {
|
||||
"backend": "", # shared fallback — applies to both search and extract
|
||||
"search_backend": "", # per-capability override for web_search (e.g. "searxng")
|
||||
"extract_backend": "", # per-capability override for web_extract (e.g. "native")
|
||||
},
|
||||
|
||||
"browser": {
|
||||
"inactivity_timeout": 120,
|
||||
"command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.)
|
||||
"record_sessions": False, # Auto-record browser sessions as WebM videos
|
||||
"allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
|
||||
# Browser engine for local mode. Passed as ``--engine <value>`` to
|
||||
# agent-browser v0.25.3+.
|
||||
# "auto" — use Chrome (default, don't pass --engine at all)
|
||||
# "lightpanda" — use Lightpanda (1.3-5.8x faster navigation, no screenshots)
|
||||
# "chrome" — explicitly request Chrome
|
||||
# Also settable via AGENT_BROWSER_ENGINE env var.
|
||||
"engine": "auto",
|
||||
"auto_local_for_private_urls": True, # When a cloud provider is set, auto-spawn local Chromium for LAN/localhost URLs instead of sending them to the cloud
|
||||
"cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
|
||||
# CDP supervisor — dialog + frame detection via a persistent WebSocket.
|
||||
@@ -561,21 +580,39 @@ DEFAULT_CONFIG = {
|
||||
},
|
||||
|
||||
# Filesystem checkpoints — automatic snapshots before destructive file ops.
|
||||
# When enabled, the agent takes a snapshot of the working directory once per
|
||||
# conversation turn (on first write_file/patch call). Use /rollback to restore.
|
||||
# When enabled, the agent takes a snapshot of the working directory once
|
||||
# per conversation turn (on first write_file/patch call). Use /rollback
|
||||
# to restore.
|
||||
#
|
||||
# Defaults changed in v2 (single shared shadow store, real pruning):
|
||||
# - enabled: True -> False (opt-in; most users never use /rollback)
|
||||
# - max_snapshots: 50 -> 20 (now actually enforced via ref rewrite)
|
||||
# - auto_prune: False -> True (orphans/stale pruned automatically)
|
||||
# Opt in via ``hermes chat --checkpoints`` or set enabled=True here.
|
||||
"checkpoints": {
|
||||
"enabled": True,
|
||||
"max_snapshots": 50, # Max checkpoints to keep per directory
|
||||
# Auto-maintenance: shadow repos accumulate forever under
|
||||
# ~/.hermes/checkpoints/ (one per cd'd working directory). Field
|
||||
# reports put the typical offender at 1000+ repos / ~12 GB. When
|
||||
# auto_prune is on, hermes sweeps at startup (at most once per
|
||||
# min_interval_hours) and deletes:
|
||||
# * orphan repos: HERMES_WORKDIR no longer exists on disk
|
||||
# * stale repos: newest mtime older than retention_days
|
||||
# Opt-in so users who rely on /rollback against long-ago sessions
|
||||
# never lose data silently.
|
||||
"auto_prune": False,
|
||||
"enabled": False,
|
||||
# Max checkpoints to keep per working directory. Pre-v2 this only
|
||||
# limited the `/rollback` listing; v2 actually rewrites the ref and
|
||||
# garbage-collects older commits.
|
||||
"max_snapshots": 20,
|
||||
# Hard ceiling on total ``~/.hermes/checkpoints/`` size (MB). When
|
||||
# exceeded, the oldest checkpoint per project is dropped in a
|
||||
# round-robin pass until total size falls under the cap.
|
||||
# 0 disables the size cap.
|
||||
"max_total_size_mb": 500,
|
||||
# Skip any single file larger than this when staging a checkpoint.
|
||||
# Prevents accidental snapshotting of datasets, model weights, and
|
||||
# other large generated assets. 0 disables the filter.
|
||||
"max_file_size_mb": 10,
|
||||
# Auto-maintenance: hermes sweeps the checkpoint base at startup
|
||||
# (at most once per ``min_interval_hours``) and:
|
||||
# * deletes project entries whose workdir no longer exists (orphan)
|
||||
# * deletes project entries whose last_touch is older than
|
||||
# ``retention_days``
|
||||
# * GCs the single shared store to reclaim unreachable objects
|
||||
# * enforces ``max_total_size_mb`` across remaining projects
|
||||
# * deletes ``legacy-*`` archives older than ``retention_days``
|
||||
"auto_prune": True,
|
||||
"retention_days": 7,
|
||||
"delete_orphans": True,
|
||||
"min_interval_hours": 24,
|
||||
@@ -606,6 +643,24 @@ DEFAULT_CONFIG = {
|
||||
"max_line_length": 2000,
|
||||
},
|
||||
|
||||
# Tool loop guardrails nudge models when they repeat failed or
|
||||
# non-progressing tool calls. Soft warnings are always-on by default;
|
||||
# hard stops are opt-in so interactive CLI/TUI sessions keep flowing.
|
||||
"tool_loop_guardrails": {
|
||||
"warnings_enabled": True,
|
||||
"hard_stop_enabled": False,
|
||||
"warn_after": {
|
||||
"exact_failure": 2,
|
||||
"same_tool_failure": 3,
|
||||
"idempotent_no_progress": 2,
|
||||
},
|
||||
"hard_stop_after": {
|
||||
"exact_failure": 5,
|
||||
"same_tool_failure": 8,
|
||||
"idempotent_no_progress": 5,
|
||||
},
|
||||
},
|
||||
|
||||
"compression": {
|
||||
"enabled": True,
|
||||
"threshold": 0.50, # compress when context usage exceeds this ratio
|
||||
@@ -620,6 +675,18 @@ DEFAULT_CONFIG = {
|
||||
"cache_ttl": "5m",
|
||||
},
|
||||
|
||||
# OpenRouter-specific settings.
|
||||
# response_cache: enable OpenRouter response caching (X-OpenRouter-Cache header).
|
||||
# When enabled, identical requests return cached responses for free (zero billing).
|
||||
# This is separate from Anthropic prompt caching and works alongside it.
|
||||
# See: https://openrouter.ai/docs/guides/features/response-caching
|
||||
# response_cache_ttl: how long cached responses remain valid, in seconds (1-86400).
|
||||
# Default 300 (5 minutes). Only used when response_cache is enabled.
|
||||
"openrouter": {
|
||||
"response_cache": True,
|
||||
"response_cache_ttl": 300,
|
||||
},
|
||||
|
||||
# AWS Bedrock provider configuration.
|
||||
# Only used when model.provider is "bedrock".
|
||||
"bedrock": {
|
||||
@@ -742,9 +809,19 @@ DEFAULT_CONFIG = {
|
||||
"show_reasoning": False,
|
||||
"streaming": False,
|
||||
"final_response_markdown": "strip", # render | strip | raw
|
||||
# Preserve recent classic CLI output across Ctrl+L, /redraw, and
|
||||
# terminal resize full-screen clears. Disable if a terminal emulator
|
||||
# behaves badly with replayed scrollback.
|
||||
"persistent_output": True,
|
||||
"persistent_output_max_lines": 200,
|
||||
"inline_diffs": True, # Show inline diff previews for write actions (write_file, patch, skill_manage)
|
||||
"show_cost": False, # Show $ cost in the status bar (off by default)
|
||||
"skin": "default",
|
||||
# UI language for static user-facing messages (approval prompts, a
|
||||
# handful of gateway slash-command replies). Does NOT affect agent
|
||||
# responses, log lines, tool outputs, or slash-command descriptions.
|
||||
# Supported: en, zh, ja, de, es, fr, tr, uk. Unknown values fall back to en.
|
||||
"language": "en",
|
||||
# TUI busy indicator style: kaomoji (default), emoji, unicode (braille
|
||||
# spinner), or ascii. Live-swappable via `/indicator <style>`.
|
||||
"tui_status_indicator": "kaomoji",
|
||||
@@ -756,6 +833,14 @@ DEFAULT_CONFIG = {
|
||||
"tool_progress_command": False, # Enable /verbose command in messaging gateway
|
||||
"tool_progress_overrides": {}, # DEPRECATED — use display.platforms instead
|
||||
"tool_preview_length": 0, # Max chars for tool call previews (0 = no limit, show full paths/commands)
|
||||
# Auto-delete system-notice replies (e.g. "✨ New session started!",
|
||||
# "♻ Restarting gateway…", "⚡ Stopped…") after N seconds on platforms
|
||||
# that support message deletion (currently Telegram; other platforms
|
||||
# ignore and leave the message in place). Only affects slash-command
|
||||
# replies wrapped with gateway.platforms.base.EphemeralReply — agent
|
||||
# responses and content messages are never touched. Default 0
|
||||
# (disabled) preserves prior behavior.
|
||||
"ephemeral_system_ttl": 0,
|
||||
"platforms": {}, # Per-platform display overrides: {"telegram": {"tool_progress": "all"}, "slack": {"tool_progress": "off"}}
|
||||
# Gateway runtime-metadata footer appended to the FINAL message of a turn
|
||||
# (disabled by default to keep replies minimal). When enabled, renders
|
||||
@@ -765,6 +850,7 @@ DEFAULT_CONFIG = {
|
||||
"enabled": False,
|
||||
"fields": ["model", "context_pct", "cwd"], # Order shown; drop any to hide
|
||||
},
|
||||
"copy_shortcut": "auto", # "auto" (platform default) | "ctrl_c" | "ctrl_shift_c" | "disabled"
|
||||
},
|
||||
|
||||
# Web dashboard settings
|
||||
@@ -798,7 +884,7 @@ DEFAULT_CONFIG = {
|
||||
# Voices: alloy, echo, fable, onyx, nova, shimmer
|
||||
},
|
||||
"xai": {
|
||||
"voice_id": "eve",
|
||||
"voice_id": "eve", # or custom voice ID — see https://docs.x.ai/developers/model-capabilities/audio/custom-voices
|
||||
"language": "en",
|
||||
"sample_rate": 24000,
|
||||
"bit_rate": 128000,
|
||||
@@ -925,7 +1011,23 @@ DEFAULT_CONFIG = {
|
||||
# injected at the start of every API call for few-shot priming.
|
||||
# Never saved to sessions, logs, or trajectories.
|
||||
"prefill_messages_file": "",
|
||||
|
||||
|
||||
# Goals — persistent cross-turn goals (Ralph-style loop).
|
||||
# After every turn, a lightweight judge call asks the auxiliary model
|
||||
# whether the active /goal is satisfied by the assistant's last
|
||||
# response. If not, Hermes feeds a continuation prompt back into the
|
||||
# same session and keeps working until the goal is done, the turn
|
||||
# budget is exhausted, or the user pauses/clears it. Judge failures
|
||||
# fail OPEN (continue) so a flaky judge never wedges progress — the
|
||||
# turn budget is the real backstop.
|
||||
"goals": {
|
||||
# Max continuation turns before Hermes auto-pauses the goal and
|
||||
# asks the user to /goal resume. Protects against judge false
|
||||
# negatives (goal actually done but judge says continue) and
|
||||
# unbounded model spend on fuzzy / unachievable goals.
|
||||
"max_turns": 20,
|
||||
},
|
||||
|
||||
# Skills — external skill directories for sharing skills across tools/agents.
|
||||
# Each path is expanded (~, ${VAR}) and resolved. Read-only — skill creation
|
||||
# always goes to ~/.hermes/skills/.
|
||||
@@ -979,6 +1081,14 @@ DEFAULT_CONFIG = {
|
||||
# Archive a skill (move to skills/.archive/) after this many days
|
||||
# without use. Archived skills are recoverable — no auto-deletion.
|
||||
"archive_after_days": 90,
|
||||
# Pre-run backup: before every real curator pass (dry-run is
|
||||
# skipped), snapshot ~/.hermes/skills/ into
|
||||
# ~/.hermes/skills/.curator_backups/<utc-iso>/skills.tar.gz so the
|
||||
# user can roll back with `hermes curator rollback`.
|
||||
"backup": {
|
||||
"enabled": True,
|
||||
"keep": 5, # retain last N regular snapshots
|
||||
},
|
||||
},
|
||||
|
||||
# Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
|
||||
@@ -1104,6 +1214,24 @@ DEFAULT_CONFIG = {
|
||||
"max_parallel_jobs": None,
|
||||
},
|
||||
|
||||
# Kanban multi-agent coordination — controls the dispatcher loop that
|
||||
# spawns workers for ready tasks. The dispatcher ticks every N seconds
|
||||
# (default 60), reclaims stale claims, promotes dependency-satisfied
|
||||
# todos to ready, and fires `hermes -p <assignee> chat -q ...` for
|
||||
# each claimable ready task. One dispatcher per profile is sufficient;
|
||||
# running more than one on the same kanban.db will race for claims.
|
||||
"kanban": {
|
||||
# Run the dispatcher inside the gateway process. On by default —
|
||||
# the cost is ~300µs every `dispatch_interval_seconds` when idle,
|
||||
# and gateway is the supervisor users already have. Set to false
|
||||
# only if you run the dispatcher as a separate systemd unit or
|
||||
# don't want the gateway to spawn workers.
|
||||
"dispatch_in_gateway": True,
|
||||
# Seconds between dispatcher ticks (idle or not). Lower = snappier
|
||||
# pickup of newly-ready tasks; higher = less SQL pressure.
|
||||
"dispatch_interval_seconds": 60,
|
||||
},
|
||||
|
||||
# execute_code settings — controls the tool used for programmatic tool calls.
|
||||
"code_execution": {
|
||||
# Execution mode:
|
||||
@@ -1200,7 +1328,10 @@ DEFAULT_CONFIG = {
|
||||
# for a single update run.
|
||||
"pre_update_backup": False,
|
||||
# How many pre-update backup zips to retain. Older ones are pruned
|
||||
# automatically after each successful backup.
|
||||
# automatically after each successful backup. Values below 1 are
|
||||
# floored to 1 — the backup just created is always preserved. To
|
||||
# disable backups entirely, set ``pre_update_backup: false`` above
|
||||
# rather than ``backup_keep: 0``.
|
||||
"backup_keep": 5,
|
||||
},
|
||||
|
||||
@@ -1701,6 +1832,14 @@ OPTIONAL_ENV_VARS = {
|
||||
"password": True,
|
||||
"category": "tool",
|
||||
},
|
||||
"SEARXNG_URL": {
|
||||
"description": "URL of your SearXNG instance for free self-hosted web search",
|
||||
"prompt": "SearXNG URL (e.g. http://localhost:8080)",
|
||||
"url": "https://searxng.github.io/searxng/",
|
||||
"tools": ["web_search"],
|
||||
"password": False,
|
||||
"category": "tool",
|
||||
},
|
||||
"BROWSERBASE_API_KEY": {
|
||||
"description": "Browserbase API key for cloud browser (optional — local browser works without this)",
|
||||
"prompt": "Browserbase API key",
|
||||
@@ -1732,6 +1871,15 @@ OPTIONAL_ENV_VARS = {
|
||||
"password": False,
|
||||
"category": "tool",
|
||||
},
|
||||
"AGENT_BROWSER_ENGINE": {
|
||||
"description": "Browser engine for local mode: auto (default Chrome), lightpanda (faster, no screenshots), chrome",
|
||||
"prompt": "Browser engine (auto/lightpanda/chrome)",
|
||||
"url": "https://github.com/vercel-labs/agent-browser",
|
||||
"tools": ["browser_navigate", "browser_snapshot", "browser_click", "browser_vision"],
|
||||
"password": False,
|
||||
"category": "tool",
|
||||
"advanced": True,
|
||||
},
|
||||
"CAMOFOX_URL": {
|
||||
"description": "Camofox browser server URL for local anti-detection browsing (e.g. http://localhost:9377)",
|
||||
"prompt": "Camofox server URL",
|
||||
@@ -1810,7 +1958,7 @@ OPTIONAL_ENV_VARS = {
|
||||
"LINEAR_API_KEY": {
|
||||
"description": "Linear personal API key (used by the `linear` skill)",
|
||||
"prompt": "Linear API key",
|
||||
"url": "https://linear.app/settings/api",
|
||||
"url": "https://linear.app/settings/account/security",
|
||||
"password": True,
|
||||
"category": "skill",
|
||||
"advanced": True,
|
||||
@@ -2400,7 +2548,17 @@ def get_missing_skill_config_vars() -> List[Dict[str, Any]]:
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
all_vars = discover_all_skill_config_vars()
|
||||
try:
|
||||
all_vars = discover_all_skill_config_vars()
|
||||
except Exception as e:
|
||||
# A malformed SKILL.md, unreadable external skill dir, or similar
|
||||
# should never break `hermes update`. Skill-config prompting is a
|
||||
# post-migration nicety, not a blocker.
|
||||
import logging
|
||||
logging.getLogger(__name__).debug(
|
||||
"discover_all_skill_config_vars failed: %s", e
|
||||
)
|
||||
return []
|
||||
if not all_vars:
|
||||
return []
|
||||
|
||||
@@ -3847,6 +4005,7 @@ _FALLBACK_COMMENT = """
|
||||
# kimi-coding-cn (KIMI_CN_API_KEY) — Kimi / Moonshot (China)
|
||||
# minimax (MINIMAX_API_KEY) — MiniMax
|
||||
# minimax-cn (MINIMAX_CN_API_KEY) — MiniMax (China)
|
||||
# bedrock (AWS IAM / boto3) — AWS Bedrock (Converse API)
|
||||
#
|
||||
# For custom OpenAI-compatible endpoints, add base_url and key_env.
|
||||
#
|
||||
@@ -3878,6 +4037,7 @@ _COMMENTED_SECTIONS = """
|
||||
# kimi-coding-cn (KIMI_CN_API_KEY) — Kimi / Moonshot (China)
|
||||
# minimax (MINIMAX_API_KEY) — MiniMax
|
||||
# minimax-cn (MINIMAX_CN_API_KEY) — MiniMax (China)
|
||||
# bedrock (AWS IAM / boto3) — AWS Bedrock (Converse API)
|
||||
#
|
||||
# For custom OpenAI-compatible endpoints, add base_url and key_env.
|
||||
#
|
||||
@@ -4579,7 +4739,9 @@ def set_config_value(key: str, value: str):
|
||||
"terminal.vercel_runtime": "TERMINAL_VERCEL_RUNTIME",
|
||||
"terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
|
||||
"terminal.docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
|
||||
"terminal.cwd": "TERMINAL_CWD",
|
||||
# terminal.cwd intentionally excluded — CLI resolves at runtime,
|
||||
# gateway bridges it in gateway/run.py. Persisting to .env causes
|
||||
# stale values to poison child processes.
|
||||
"terminal.timeout": "TERMINAL_TIMEOUT",
|
||||
"terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR",
|
||||
"terminal.persistent_shell": "TERMINAL_PERSISTENT_SHELL",
|
||||
@@ -4733,3 +4895,45 @@ def config_command(args):
|
||||
print(" hermes config path Show config file path")
|
||||
print(" hermes config env-path Show .env file path")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# ── Profile-driven env var injection ─────────────────────────────────────────
|
||||
# Any provider registered in providers/ with auth_type="api_key" automatically
|
||||
# gets its env_vars exposed in OPTIONAL_ENV_VARS without editing this file.
|
||||
# Runs once at import time.
|
||||
|
||||
_profile_env_vars_injected = False
|
||||
|
||||
|
||||
def _inject_profile_env_vars() -> None:
|
||||
"""Populate OPTIONAL_ENV_VARS from provider profiles not already listed.
|
||||
|
||||
Called once at module load time. Idempotent — repeated calls are no-ops.
|
||||
"""
|
||||
global _profile_env_vars_injected
|
||||
if _profile_env_vars_injected:
|
||||
return
|
||||
_profile_env_vars_injected = True
|
||||
try:
|
||||
from providers import list_providers
|
||||
for _pp in list_providers():
|
||||
if _pp.auth_type not in ("api_key",):
|
||||
continue
|
||||
for _var in _pp.env_vars:
|
||||
if _var in OPTIONAL_ENV_VARS:
|
||||
continue
|
||||
_is_key = not _var.endswith("_BASE_URL") and not _var.endswith("_URL")
|
||||
OPTIONAL_ENV_VARS[_var] = {
|
||||
"description": f"{_pp.display_name or _pp.name} {'API key' if _is_key else 'base URL override'}",
|
||||
"prompt": f"{_pp.display_name or _pp.name} {'API key' if _is_key else 'base URL (leave empty for default)'}",
|
||||
"url": _pp.signup_url or None,
|
||||
"password": _is_key,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# Eagerly inject so that OPTIONAL_ENV_VARS is fully populated at import time.
|
||||
_inject_profile_env_vars()
|
||||
|
||||
@@ -93,6 +93,8 @@ def cron_list(show_all: bool = False):
|
||||
script = job.get("script")
|
||||
if script:
|
||||
print(f" Script: {script}")
|
||||
if job.get("no_agent"):
|
||||
print(f" Mode: {color('no-agent', Colors.DIM)} (script stdout delivered directly)")
|
||||
workdir = job.get("workdir")
|
||||
if workdir:
|
||||
print(f" Workdir: {workdir}")
|
||||
@@ -172,6 +174,7 @@ def cron_create(args):
|
||||
skills=_normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None)),
|
||||
script=getattr(args, "script", None),
|
||||
workdir=getattr(args, "workdir", None),
|
||||
no_agent=getattr(args, "no_agent", False) or None,
|
||||
)
|
||||
if not result.get("success"):
|
||||
print(color(f"Failed to create job: {result.get('error', 'unknown error')}", Colors.RED))
|
||||
@@ -184,6 +187,8 @@ def cron_create(args):
|
||||
job_data = result.get("job", {})
|
||||
if job_data.get("script"):
|
||||
print(f" Script: {job_data['script']}")
|
||||
if job_data.get("no_agent"):
|
||||
print(" Mode: no-agent (script stdout delivered directly)")
|
||||
if job_data.get("workdir"):
|
||||
print(f" Workdir: {job_data['workdir']}")
|
||||
print(f" Next run: {result['next_run_at']}")
|
||||
@@ -225,6 +230,7 @@ def cron_edit(args):
|
||||
skills=final_skills,
|
||||
script=getattr(args, "script", None),
|
||||
workdir=getattr(args, "workdir", None),
|
||||
no_agent=getattr(args, "no_agent", None),
|
||||
)
|
||||
if not result.get("success"):
|
||||
print(color(f"Failed to update job: {result.get('error', 'unknown error')}", Colors.RED))
|
||||
@@ -240,6 +246,8 @@ def cron_edit(args):
|
||||
print(" Skills: none")
|
||||
if updated.get("script"):
|
||||
print(f" Script: {updated['script']}")
|
||||
if updated.get("no_agent"):
|
||||
print(" Mode: no-agent (script stdout delivered directly)")
|
||||
if updated.get("workdir"):
|
||||
print(f" Workdir: {updated['workdir']}")
|
||||
return 0
|
||||
|
||||
+323
-7
@@ -108,6 +108,49 @@ def _cmd_status(args) -> int:
|
||||
f"last_activity={last}"
|
||||
)
|
||||
|
||||
# Show top 5 most-active and least-active skills by activity_count
|
||||
# (use + view + patch). This is a different signal from
|
||||
# least-recently-active: activity_count reflects frequency,
|
||||
# last_activity_at reflects recency. A skill touched 30 times a year
|
||||
# ago is high-frequency but stale; a skill touched once yesterday is
|
||||
# recent but low-frequency. Both can matter.
|
||||
active_all = by_state.get("active", [])
|
||||
if active_all:
|
||||
most_active = sorted(
|
||||
active_all,
|
||||
key=lambda r: (r.get("activity_count") or 0, r.get("last_activity_at") or ""),
|
||||
reverse=True,
|
||||
)[:5]
|
||||
if most_active and (most_active[0].get("activity_count") or 0) > 0:
|
||||
print("\nmost active (top 5):")
|
||||
for r in most_active:
|
||||
last = _fmt_ts(r.get("last_activity_at"))
|
||||
print(
|
||||
f" {r['name']:40s} "
|
||||
f"activity={r.get('activity_count', 0):3d} "
|
||||
f"use={r.get('use_count', 0):3d} "
|
||||
f"view={r.get('view_count', 0):3d} "
|
||||
f"patches={r.get('patch_count', 0):3d} "
|
||||
f"last_activity={last}"
|
||||
)
|
||||
|
||||
least_active = sorted(
|
||||
active_all,
|
||||
key=lambda r: (r.get("activity_count") or 0, r.get("last_activity_at") or ""),
|
||||
)[:5]
|
||||
if least_active:
|
||||
print("\nleast active (top 5):")
|
||||
for r in least_active:
|
||||
last = _fmt_ts(r.get("last_activity_at"))
|
||||
print(
|
||||
f" {r['name']:40s} "
|
||||
f"activity={r.get('activity_count', 0):3d} "
|
||||
f"use={r.get('use_count', 0):3d} "
|
||||
f"view={r.get('view_count', 0):3d} "
|
||||
f"patches={r.get('patch_count', 0):3d} "
|
||||
f"last_activity={last}"
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
@@ -117,7 +160,11 @@ def _cmd_run(args) -> int:
|
||||
print("curator: disabled via config; enable with `curator.enabled: true`")
|
||||
return 1
|
||||
|
||||
print("curator: running review pass...")
|
||||
dry = bool(getattr(args, "dry_run", False))
|
||||
if dry:
|
||||
print("curator: running DRY-RUN (report only, no mutations)...")
|
||||
else:
|
||||
print("curator: running review pass...")
|
||||
|
||||
def _on_summary(msg: str) -> None:
|
||||
print(msg)
|
||||
@@ -125,17 +172,29 @@ def _cmd_run(args) -> int:
|
||||
result = curator.run_curator_review(
|
||||
on_summary=_on_summary,
|
||||
synchronous=bool(args.synchronous),
|
||||
dry_run=dry,
|
||||
)
|
||||
auto = result.get("auto_transitions", {})
|
||||
if auto:
|
||||
print(
|
||||
f"auto: checked={auto.get('checked', 0)} "
|
||||
f"stale={auto.get('marked_stale', 0)} "
|
||||
f"archived={auto.get('archived', 0)} "
|
||||
f"reactivated={auto.get('reactivated', 0)}"
|
||||
)
|
||||
if dry:
|
||||
print(
|
||||
f"auto (preview): {auto.get('checked', 0)} candidate skill(s) "
|
||||
"— no transitions applied in dry-run"
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f"auto: checked={auto.get('checked', 0)} "
|
||||
f"stale={auto.get('marked_stale', 0)} "
|
||||
f"archived={auto.get('archived', 0)} "
|
||||
f"reactivated={auto.get('reactivated', 0)}"
|
||||
)
|
||||
if not args.synchronous:
|
||||
print("llm pass running in background — check `hermes curator status` later")
|
||||
if dry:
|
||||
print(
|
||||
"dry-run: no changes applied. When the report lands, read it with "
|
||||
"`hermes curator status` and run `hermes curator run` (no flag) to apply."
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
@@ -186,6 +245,203 @@ def _cmd_restore(args) -> int:
|
||||
return 0 if ok else 1
|
||||
|
||||
|
||||
def _cmd_archive(args) -> int:
|
||||
"""Manually archive an agent-created skill. Refuses if pinned.
|
||||
|
||||
The auto-curator archives stale skills on its own schedule; this verb is
|
||||
for the user who wants to archive *now* without waiting for a run.
|
||||
"""
|
||||
from tools import skill_usage
|
||||
if skill_usage.get_record(args.skill).get("pinned"):
|
||||
print(
|
||||
f"curator: '{args.skill}' is pinned — unpin first with "
|
||||
f"`hermes curator unpin {args.skill}`"
|
||||
)
|
||||
return 1
|
||||
ok, msg = skill_usage.archive_skill(args.skill)
|
||||
print(f"curator: {msg}")
|
||||
return 0 if ok else 1
|
||||
|
||||
|
||||
def _idle_days(record: dict) -> Optional[int]:
|
||||
"""Days since the skill's last activity (view / use / patch).
|
||||
|
||||
Falls back to ``created_at`` so a skill that was authored but never used
|
||||
can still be pruned — otherwise never-touched skills would be immortal.
|
||||
Returns None only when both fields are missing or unparseable.
|
||||
"""
|
||||
ts = record.get("last_activity_at") or record.get("created_at")
|
||||
if not ts:
|
||||
return None
|
||||
try:
|
||||
dt = datetime.fromisoformat(str(ts))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return max(0, (datetime.now(timezone.utc) - dt).days)
|
||||
|
||||
|
||||
def _cmd_prune(args) -> int:
|
||||
"""Bulk-archive agent-created skills idle for >= N days.
|
||||
|
||||
Pinned skills are exempt. Already-archived skills are skipped. Default
|
||||
``--days 90`` matches a conservative read of the curator's own archive
|
||||
threshold; adjust with ``--days``. Use ``--dry-run`` to preview.
|
||||
"""
|
||||
from tools import skill_usage
|
||||
days = getattr(args, "days", 90)
|
||||
if days < 1:
|
||||
print(f"curator: --days must be >= 1 (got {days})", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
dry_run = bool(getattr(args, "dry_run", False))
|
||||
skip_confirm = bool(getattr(args, "yes", False))
|
||||
|
||||
candidates = []
|
||||
for r in skill_usage.agent_created_report():
|
||||
if r.get("pinned"):
|
||||
continue
|
||||
if r.get("state") == skill_usage.STATE_ARCHIVED:
|
||||
continue
|
||||
idle = _idle_days(r)
|
||||
if idle is None or idle < days:
|
||||
continue
|
||||
candidates.append((r["name"], idle))
|
||||
|
||||
if not candidates:
|
||||
print(f"curator: nothing to prune (no unpinned skills idle >= {days}d)")
|
||||
return 0
|
||||
|
||||
candidates.sort(key=lambda c: -c[1])
|
||||
print(f"curator: {len(candidates)} skill(s) idle >= {days}d:")
|
||||
for name, idle in candidates:
|
||||
print(f" {name:40s} idle {idle}d")
|
||||
|
||||
if dry_run:
|
||||
print("\n(dry run — no changes made)")
|
||||
return 0
|
||||
|
||||
if not skip_confirm:
|
||||
try:
|
||||
reply = input(f"\nArchive {len(candidates)} skill(s)? [y/N] ").strip().lower()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print("\ncurator: aborted")
|
||||
return 1
|
||||
if reply not in ("y", "yes"):
|
||||
print("curator: aborted")
|
||||
return 1
|
||||
|
||||
archived = 0
|
||||
failures = []
|
||||
for name, _ in candidates:
|
||||
ok, msg = skill_usage.archive_skill(name)
|
||||
if ok:
|
||||
archived += 1
|
||||
else:
|
||||
failures.append((name, msg))
|
||||
|
||||
print(f"\ncurator: archived {archived}/{len(candidates)}")
|
||||
if failures:
|
||||
print("failures:")
|
||||
for name, msg in failures:
|
||||
print(f" {name}: {msg}")
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_backup(args) -> int:
|
||||
"""Take a manual snapshot of the skills tree. Same mechanism as the
|
||||
automatic pre-run snapshot, just user-initiated."""
|
||||
from agent import curator_backup
|
||||
if not curator_backup.is_enabled():
|
||||
print(
|
||||
"curator: backups are disabled via config "
|
||||
"(`curator.backup.enabled: false`); re-enable to snapshot"
|
||||
)
|
||||
return 1
|
||||
reason = getattr(args, "reason", None) or "manual"
|
||||
snap = curator_backup.snapshot_skills(reason=reason)
|
||||
if snap is None:
|
||||
print("curator: snapshot failed — check logs (backup disabled or IO error)")
|
||||
return 1
|
||||
print(f"curator: snapshot created at ~/.hermes/skills/.curator_backups/{snap.name}")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_rollback(args) -> int:
|
||||
"""Restore the skills tree from a snapshot. Defaults to newest.
|
||||
|
||||
``--list`` prints available snapshots and exits. ``--id <stamp>`` picks
|
||||
a specific one. Without ``-y``, prompts for confirmation. A safety
|
||||
snapshot of the current tree is always taken first, so rollbacks are
|
||||
themselves undoable.
|
||||
"""
|
||||
from agent import curator_backup
|
||||
|
||||
if getattr(args, "list", False):
|
||||
print(curator_backup.summarize_backups())
|
||||
return 0
|
||||
|
||||
backup_id = getattr(args, "backup_id", None)
|
||||
target_path = curator_backup._resolve_backup(backup_id)
|
||||
if target_path is None:
|
||||
rows = curator_backup.list_backups()
|
||||
if not rows:
|
||||
print(
|
||||
"curator: no snapshots exist yet. Take one with "
|
||||
"`hermes curator backup` or wait for the next curator run."
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f"curator: no snapshot matching "
|
||||
f"{'id ' + repr(backup_id) if backup_id else 'your query'}."
|
||||
)
|
||||
print("Available:")
|
||||
print(curator_backup.summarize_backups())
|
||||
return 1
|
||||
|
||||
manifest = curator_backup._read_manifest(target_path)
|
||||
print(f"Rollback target: {target_path.name}")
|
||||
if manifest:
|
||||
print(f" reason: {manifest.get('reason', '?')}")
|
||||
print(f" created_at: {manifest.get('created_at', '?')}")
|
||||
print(f" skill files: {manifest.get('skill_files', '?')}")
|
||||
cron = manifest.get("cron_jobs") or {}
|
||||
if isinstance(cron, dict):
|
||||
if cron.get("backed_up"):
|
||||
print(
|
||||
f" cron jobs: {cron.get('jobs_count', 0)} "
|
||||
f"(will be restored for skill-link fields only)"
|
||||
)
|
||||
else:
|
||||
reason = cron.get("reason", "not captured")
|
||||
print(f" cron jobs: not in snapshot ({reason})")
|
||||
print(
|
||||
"\nThis will replace the current ~/.hermes/skills/ tree (a safety "
|
||||
"snapshot of the current state is taken first so this is undoable). "
|
||||
"Cron jobs that still exist will have their skills/skill fields "
|
||||
"restored from the snapshot; all other cron fields are left alone."
|
||||
)
|
||||
|
||||
if not getattr(args, "yes", False):
|
||||
try:
|
||||
ans = input("Proceed? [y/N] ").strip().lower()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print("\ncancelled")
|
||||
return 1
|
||||
if ans not in ("y", "yes"):
|
||||
print("cancelled")
|
||||
return 1
|
||||
|
||||
ok, msg, _ = curator_backup.rollback(backup_id=target_path.name)
|
||||
if ok:
|
||||
print(f"curator: {msg}")
|
||||
return 0
|
||||
print(f"curator: rollback failed — {msg}")
|
||||
return 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# argparse wiring (called from hermes_cli.main)
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -207,6 +463,11 @@ def register_cli(parent: argparse.ArgumentParser) -> None:
|
||||
"--sync", "--synchronous", dest="synchronous", action="store_true",
|
||||
help="Wait for the LLM review pass to finish (default: background thread)",
|
||||
)
|
||||
p_run.add_argument(
|
||||
"--dry-run", dest="dry_run", action="store_true",
|
||||
help="Report only — no state changes, no archives, no consolidation "
|
||||
"(use this to preview what curator would do)",
|
||||
)
|
||||
p_run.set_defaults(func=_cmd_run)
|
||||
|
||||
p_pause = subs.add_parser("pause", help="Pause the curator until resumed")
|
||||
@@ -227,6 +488,61 @@ def register_cli(parent: argparse.ArgumentParser) -> None:
|
||||
p_restore.add_argument("skill", help="Skill name")
|
||||
p_restore.set_defaults(func=_cmd_restore)
|
||||
|
||||
p_archive = subs.add_parser(
|
||||
"archive",
|
||||
help="Manually archive a skill (move to .archive/, excluded from prompt)",
|
||||
)
|
||||
p_archive.add_argument("skill", help="Skill name")
|
||||
p_archive.set_defaults(func=_cmd_archive)
|
||||
|
||||
p_prune = subs.add_parser(
|
||||
"prune",
|
||||
help="Bulk-archive agent-created skills idle for >= N days (default 90)",
|
||||
)
|
||||
p_prune.add_argument(
|
||||
"--days", type=int, default=90,
|
||||
help="Archive skills idle for at least N days (default: 90)",
|
||||
)
|
||||
p_prune.add_argument(
|
||||
"-y", "--yes", action="store_true",
|
||||
help="Skip the confirmation prompt",
|
||||
)
|
||||
p_prune.add_argument(
|
||||
"--dry-run", dest="dry_run", action="store_true",
|
||||
help="Show what would be archived without doing it",
|
||||
)
|
||||
p_prune.set_defaults(func=_cmd_prune)
|
||||
|
||||
p_backup = subs.add_parser(
|
||||
"backup",
|
||||
help="Take a manual tar.gz snapshot of ~/.hermes/skills/ "
|
||||
"(curator also does this automatically before every real run)",
|
||||
)
|
||||
p_backup.add_argument(
|
||||
"--reason", default=None,
|
||||
help="Free-text label stored in manifest.json (default: 'manual')",
|
||||
)
|
||||
p_backup.set_defaults(func=_cmd_backup)
|
||||
|
||||
p_rollback = subs.add_parser(
|
||||
"rollback",
|
||||
help="Restore ~/.hermes/skills/ from a curator snapshot "
|
||||
"(defaults to the newest)",
|
||||
)
|
||||
p_rollback.add_argument(
|
||||
"--list", action="store_true",
|
||||
help="List available snapshots and exit without restoring",
|
||||
)
|
||||
p_rollback.add_argument(
|
||||
"--id", dest="backup_id", default=None,
|
||||
help="Snapshot id to restore (see `--list`); default: newest",
|
||||
)
|
||||
p_rollback.add_argument(
|
||||
"-y", "--yes", action="store_true",
|
||||
help="Skip confirmation prompt",
|
||||
)
|
||||
p_rollback.set_defaults(func=_cmd_rollback)
|
||||
|
||||
|
||||
def cli_main(argv=None) -> int:
|
||||
"""Standalone entry (also usable by hermes_cli.main fallthrough)."""
|
||||
|
||||
@@ -156,6 +156,8 @@ def curses_checklist(
|
||||
flush_stdin()
|
||||
return result_holder[0] if result_holder[0] is not None else cancel_returns
|
||||
|
||||
except KeyboardInterrupt:
|
||||
return cancel_returns
|
||||
except Exception:
|
||||
return _numbered_fallback(title, items, selected, cancel_returns, status_fn)
|
||||
|
||||
@@ -278,6 +280,8 @@ def curses_radiolist(
|
||||
flush_stdin()
|
||||
return result_holder[0] if result_holder[0] is not None else cancel_returns
|
||||
|
||||
except KeyboardInterrupt:
|
||||
return cancel_returns
|
||||
except Exception:
|
||||
return _radio_numbered_fallback(title, items, selected, cancel_returns)
|
||||
|
||||
@@ -401,6 +405,8 @@ def curses_single_select(
|
||||
return None
|
||||
return result_holder[0]
|
||||
|
||||
except KeyboardInterrupt:
|
||||
return None
|
||||
except Exception:
|
||||
all_items = list(items) + [cancel_label]
|
||||
cancel_idx = len(items)
|
||||
|
||||
+82
-7
@@ -1,12 +1,19 @@
|
||||
"""``hermes debug`` — debug tools for Hermes Agent.
|
||||
"""``hermes debug`` debug tools for Hermes Agent.
|
||||
|
||||
Currently supports:
|
||||
hermes debug share Upload debug report (system info + logs) to a
|
||||
paste service and print a shareable URL.
|
||||
By default, log content is run through
|
||||
``agent.redact.redact_sensitive_text`` with
|
||||
``force=True`` before upload so credentials in
|
||||
``~/.hermes/logs/*.log`` are not leaked into
|
||||
the public paste service. Pass ``--no-redact``
|
||||
to disable.
|
||||
"""
|
||||
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
@@ -19,6 +26,16 @@ from typing import Optional
|
||||
from hermes_constants import get_hermes_home
|
||||
from utils import atomic_replace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Banner prepended to upload-bound log content when redaction is enabled.
|
||||
# Visible in the public paste so reviewers know the content was sanitized.
|
||||
# Kept short; the trailing newline guarantees the banner sits on its own line.
|
||||
_REDACTION_BANNER = (
|
||||
"[hermes debug share: log content redacted at upload time. "
|
||||
"run with --no-redact to disable]\n"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Paste services — try paste.rs first, dpaste.com as fallback.
|
||||
@@ -368,17 +385,40 @@ def _resolve_log_path(log_name: str) -> Optional[Path]:
|
||||
return None
|
||||
|
||||
|
||||
def _redact_log_text(text: str) -> str:
|
||||
"""Run ``redact_sensitive_text`` with ``force=True`` over upload-bound text.
|
||||
|
||||
Uses ``force=True`` so redaction fires regardless of the operator's
|
||||
``security.redact_secrets`` setting. The local on-disk log file is
|
||||
not modified; only the in-memory copy headed for the public paste
|
||||
service is sanitized. Returns the redacted text (or the original
|
||||
when empty / non-string).
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
return redact_sensitive_text(text, force=True)
|
||||
|
||||
|
||||
def _capture_log_snapshot(
|
||||
log_name: str,
|
||||
*,
|
||||
tail_lines: int,
|
||||
max_bytes: int = _MAX_LOG_BYTES,
|
||||
redact: bool = True,
|
||||
) -> LogSnapshot:
|
||||
"""Capture a log once and derive summary/full-log views from it.
|
||||
|
||||
The report tail and standalone log upload must come from the same file
|
||||
snapshot. Otherwise a rotation/truncate between reads can make the report
|
||||
look newer than the uploaded ``agent.log`` paste.
|
||||
|
||||
When ``redact`` is True (the default), both ``tail_text`` and
|
||||
``full_text`` are run through ``_redact_log_text`` so the snapshot
|
||||
returned is upload-safe. The on-disk log file is never modified.
|
||||
Pass ``redact=False`` to capture original log content (used by
|
||||
``hermes debug share --no-redact``).
|
||||
"""
|
||||
log_path = _resolve_log_path(log_name)
|
||||
if log_path is None:
|
||||
@@ -438,18 +478,34 @@ def _capture_log_snapshot(
|
||||
if truncated:
|
||||
full_text = f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{full_text}"
|
||||
|
||||
if redact:
|
||||
tail_text = _redact_log_text(tail_text)
|
||||
full_text = _redact_log_text(full_text)
|
||||
|
||||
return LogSnapshot(path=log_path, tail_text=tail_text, full_text=full_text)
|
||||
except Exception as exc:
|
||||
return LogSnapshot(path=log_path, tail_text=f"(error reading: {exc})", full_text=None)
|
||||
|
||||
|
||||
def _capture_default_log_snapshots(log_lines: int) -> dict[str, LogSnapshot]:
|
||||
"""Capture all logs used by debug-share exactly once."""
|
||||
def _capture_default_log_snapshots(
|
||||
log_lines: int, *, redact: bool = True
|
||||
) -> dict[str, LogSnapshot]:
|
||||
"""Capture all logs used by debug-share exactly once.
|
||||
|
||||
``redact`` is forwarded to each ``_capture_log_snapshot`` call so all
|
||||
captured logs share the same redaction policy for a given run.
|
||||
"""
|
||||
errors_lines = min(log_lines, 100)
|
||||
return {
|
||||
"agent": _capture_log_snapshot("agent", tail_lines=log_lines),
|
||||
"errors": _capture_log_snapshot("errors", tail_lines=errors_lines),
|
||||
"gateway": _capture_log_snapshot("gateway", tail_lines=errors_lines),
|
||||
"agent": _capture_log_snapshot(
|
||||
"agent", tail_lines=log_lines, redact=redact
|
||||
),
|
||||
"errors": _capture_log_snapshot(
|
||||
"errors", tail_lines=errors_lines, redact=redact
|
||||
),
|
||||
"gateway": _capture_log_snapshot(
|
||||
"gateway", tail_lines=errors_lines, redact=redact
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -532,6 +588,7 @@ def run_debug_share(args):
|
||||
log_lines = getattr(args, "lines", 200)
|
||||
expiry = getattr(args, "expire", 7)
|
||||
local_only = getattr(args, "local", False)
|
||||
redact = not getattr(args, "no_redact", False)
|
||||
|
||||
if not local_only:
|
||||
print(_PRIVACY_NOTICE)
|
||||
@@ -539,8 +596,16 @@ def run_debug_share(args):
|
||||
print("Collecting debug report...")
|
||||
|
||||
# Capture dump once — prepended to every paste for context.
|
||||
# The dump is already redacted at extract time via dump.py:_redact;
|
||||
# log_snapshots are redacted by _capture_default_log_snapshots when
|
||||
# redact=True so credentials never reach the public paste service.
|
||||
dump_text = _capture_dump()
|
||||
log_snapshots = _capture_default_log_snapshots(log_lines)
|
||||
log_snapshots = _capture_default_log_snapshots(log_lines, redact=redact)
|
||||
|
||||
if redact:
|
||||
logger.info(
|
||||
"hermes debug share: applied force-mode redaction to log snapshots before upload"
|
||||
)
|
||||
|
||||
report = collect_debug_report(
|
||||
log_lines=log_lines,
|
||||
@@ -556,6 +621,15 @@ def run_debug_share(args):
|
||||
if gateway_log:
|
||||
gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log
|
||||
|
||||
# Visible banner so reviewers reading the public paste know redaction
|
||||
# was applied at upload time. Banner is omitted under --no-redact.
|
||||
if redact:
|
||||
report = _REDACTION_BANNER + report
|
||||
if agent_log:
|
||||
agent_log = _REDACTION_BANNER + agent_log
|
||||
if gateway_log:
|
||||
gateway_log = _REDACTION_BANNER + gateway_log
|
||||
|
||||
if local_only:
|
||||
print(report)
|
||||
if agent_log:
|
||||
@@ -666,6 +740,7 @@ def run_debug(args):
|
||||
print(" --lines N Number of log lines to include (default: 200)")
|
||||
print(" --expire N Paste expiry in days (default: 7)")
|
||||
print(" --local Print report locally instead of uploading")
|
||||
print(" --no-redact Disable upload-time secret redaction (default: redact)")
|
||||
print()
|
||||
print("Options (delete):")
|
||||
print(" <url> ... One or more paste URLs to delete")
|
||||
|
||||
+132
-35
@@ -12,6 +12,7 @@ import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_cli.config import get_project_root, get_hermes_home, get_env_path
|
||||
from hermes_cli.env_loader import load_hermes_dotenv
|
||||
from hermes_constants import display_hermes_home
|
||||
|
||||
PROJECT_ROOT = get_project_root()
|
||||
@@ -19,15 +20,8 @@ HERMES_HOME = get_hermes_home()
|
||||
_DHH = display_hermes_home() # user-facing display path (e.g. ~/.hermes or ~/.hermes/profiles/coder)
|
||||
|
||||
# Load environment variables from ~/.hermes/.env so API key checks work
|
||||
from dotenv import load_dotenv
|
||||
_env_path = get_env_path()
|
||||
if _env_path.exists():
|
||||
try:
|
||||
load_dotenv(_env_path, encoding="utf-8")
|
||||
except UnicodeDecodeError:
|
||||
load_dotenv(_env_path, encoding="latin-1")
|
||||
# Also try project .env as dev fallback
|
||||
load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")
|
||||
load_hermes_dotenv(hermes_home=_env_path.parent, project_env=PROJECT_ROOT / ".env")
|
||||
|
||||
from hermes_cli.colors import Colors, color
|
||||
from hermes_cli.models import _HERMES_USER_AGENT
|
||||
@@ -113,15 +107,35 @@ def _honcho_is_configured_for_doctor() -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _is_kanban_worker_env_gate(item: dict) -> bool:
|
||||
"""Return True when Kanban is unavailable only because this is not a worker process."""
|
||||
if item.get("name") != "kanban":
|
||||
return False
|
||||
if os.environ.get("HERMES_KANBAN_TASK"):
|
||||
return False
|
||||
|
||||
tools = item.get("tools") or []
|
||||
return bool(tools) and all(str(tool).startswith("kanban_") for tool in tools)
|
||||
|
||||
|
||||
def _doctor_tool_availability_detail(toolset: str) -> str:
|
||||
"""Optional explanatory suffix for toolsets whose doctor status needs context."""
|
||||
if toolset == "kanban" and not os.environ.get("HERMES_KANBAN_TASK"):
|
||||
return "(runtime-gated; loaded only for dispatcher-spawned workers)"
|
||||
return ""
|
||||
|
||||
|
||||
def _apply_doctor_tool_availability_overrides(available: list[str], unavailable: list[dict]) -> tuple[list[str], list[dict]]:
|
||||
"""Adjust runtime-gated tool availability for doctor diagnostics."""
|
||||
if not _honcho_is_configured_for_doctor():
|
||||
return available, unavailable
|
||||
|
||||
updated_available = list(available)
|
||||
updated_unavailable = []
|
||||
for item in unavailable:
|
||||
if item.get("name") == "honcho":
|
||||
name = item.get("name")
|
||||
if _is_kanban_worker_env_gate(item):
|
||||
if "kanban" not in updated_available:
|
||||
updated_available.append("kanban")
|
||||
continue
|
||||
if name == "honcho" and _honcho_is_configured_for_doctor():
|
||||
if "honcho" not in updated_available:
|
||||
updated_available.append("honcho")
|
||||
continue
|
||||
@@ -175,6 +189,85 @@ def _check_gateway_service_linger(issues: list[str]) -> None:
|
||||
check_warn("Could not verify systemd linger", f"({linger_detail})")
|
||||
|
||||
|
||||
_APIKEY_PROVIDERS_CACHE: list | None = None
|
||||
|
||||
|
||||
def _build_apikey_providers_list() -> list:
|
||||
"""Build the API-key provider health-check list once and cache it.
|
||||
|
||||
Tuple format: (name, env_vars, default_url, base_env, supports_models_endpoint)
|
||||
Base list augmented with any ProviderProfile with auth_type="api_key" not
|
||||
already present — adding plugins/model-providers/<name>/ is sufficient to get into doctor.
|
||||
"""
|
||||
_static = [
|
||||
("Z.AI / GLM", ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
|
||||
("Kimi / Moonshot", ("KIMI_API_KEY",), "https://api.moonshot.ai/v1/models", "KIMI_BASE_URL", True),
|
||||
("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
|
||||
("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
|
||||
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
|
||||
("GMI Cloud", ("GMI_API_KEY",), "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
|
||||
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
|
||||
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
|
||||
("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
|
||||
("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
|
||||
# MiniMax global: /v1 endpoint supports /models.
|
||||
("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True),
|
||||
# MiniMax CN: /v1 endpoint does NOT support /models (returns 404).
|
||||
("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", False),
|
||||
("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
|
||||
("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True),
|
||||
("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True),
|
||||
# OpenCode Go has no shared /models endpoint; skip the health check.
|
||||
("OpenCode Go", ("OPENCODE_GO_API_KEY",), None, "OPENCODE_GO_BASE_URL", False),
|
||||
]
|
||||
_known_names = {t[0] for t in _static}
|
||||
# Also index by profile canonical name so profiles without display_name
|
||||
# don't create duplicate entries for providers already in the static list.
|
||||
_known_canonical: set[str] = set()
|
||||
_name_to_canonical = {
|
||||
"Z.AI / GLM": "zai", "Kimi / Moonshot": "kimi-coding",
|
||||
"StepFun Step Plan": "stepfun", "Kimi / Moonshot (China)": "kimi-coding-cn",
|
||||
"Arcee AI": "arcee", "GMI Cloud": "gmi", "DeepSeek": "deepseek",
|
||||
"Hugging Face": "huggingface", "NVIDIA NIM": "nvidia",
|
||||
"Alibaba/DashScope": "alibaba", "MiniMax": "minimax",
|
||||
"MiniMax (China)": "minimax-cn", "Vercel AI Gateway": "ai-gateway",
|
||||
"Kilo Code": "kilocode", "OpenCode Zen": "opencode-zen",
|
||||
"OpenCode Go": "opencode-go",
|
||||
}
|
||||
for _label, _canonical in _name_to_canonical.items():
|
||||
_known_canonical.add(_canonical)
|
||||
try:
|
||||
from providers import list_providers
|
||||
from providers.base import ProviderProfile as _PP
|
||||
for _pp in list_providers():
|
||||
if not isinstance(_pp, _PP) or _pp.auth_type != "api_key" or not _pp.env_vars:
|
||||
continue
|
||||
_label = _pp.display_name or _pp.name
|
||||
if _label in _known_names or _pp.name in _known_canonical:
|
||||
continue
|
||||
# Separate API-key vars from base-URL override vars — the health-check
|
||||
# loop sends the first found value as Authorization: Bearer, so a URL
|
||||
# string must never be picked.
|
||||
_key_vars = tuple(
|
||||
v for v in _pp.env_vars
|
||||
if not v.endswith("_BASE_URL") and not v.endswith("_URL")
|
||||
)
|
||||
_base_var = next(
|
||||
(v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")),
|
||||
None,
|
||||
)
|
||||
if not _key_vars:
|
||||
continue
|
||||
_models_url = (
|
||||
(_pp.models_url or (_pp.base_url.rstrip("/") + "/models"))
|
||||
if _pp.base_url else None
|
||||
)
|
||||
_static.append((_label, _key_vars, _models_url, _base_var, True))
|
||||
except Exception:
|
||||
pass
|
||||
return _static
|
||||
|
||||
|
||||
def run_doctor(args):
|
||||
"""Run diagnostic checks."""
|
||||
should_fix = getattr(args, 'fix', False)
|
||||
@@ -263,8 +356,11 @@ def run_doctor(args):
|
||||
if env_path.exists():
|
||||
check_ok(f"{_DHH}/.env file exists")
|
||||
|
||||
# Check for common issues
|
||||
content = env_path.read_text()
|
||||
# Check for common issues. Pin encoding to UTF-8 because .env files are
|
||||
# written as UTF-8 everywhere in the codebase, while Path.read_text()
|
||||
# defaults to the system locale — which crashes on non-UTF-8 Windows
|
||||
# locales (e.g. GBK) as soon as the file contains any non-ASCII byte.
|
||||
content = env_path.read_text(encoding="utf-8")
|
||||
if _has_provider_env_config(content):
|
||||
check_ok("API key or custom endpoint configured")
|
||||
else:
|
||||
@@ -932,6 +1028,8 @@ def run_doctor(args):
|
||||
agent_browser_path = PROJECT_ROOT / "node_modules" / "agent-browser"
|
||||
if agent_browser_path.exists():
|
||||
check_ok("agent-browser (Node.js)", "(browser automation)")
|
||||
elif shutil.which("agent-browser"):
|
||||
check_ok("agent-browser", "(browser automation)")
|
||||
else:
|
||||
if _is_termux():
|
||||
check_info("agent-browser is not installed (expected in the tested Termux path)")
|
||||
@@ -1082,26 +1180,11 @@ def run_doctor(args):
|
||||
# -- API-key providers --
|
||||
# Tuple: (name, env_vars, default_url, base_env, supports_models_endpoint)
|
||||
# If supports_models_endpoint is False, we skip the health check and just show "configured"
|
||||
_apikey_providers = [
|
||||
("Z.AI / GLM", ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
|
||||
("Kimi / Moonshot", ("KIMI_API_KEY",), "https://api.moonshot.ai/v1/models", "KIMI_BASE_URL", True),
|
||||
("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
|
||||
("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
|
||||
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
|
||||
("GMI Cloud", ("GMI_API_KEY",), "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
|
||||
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
|
||||
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
|
||||
("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
|
||||
("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
|
||||
# MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
|
||||
("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True),
|
||||
("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", True),
|
||||
("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
|
||||
("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True),
|
||||
("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True),
|
||||
# OpenCode Go has no shared /models endpoint; skip the health check.
|
||||
("OpenCode Go", ("OPENCODE_GO_API_KEY",), None, "OPENCODE_GO_BASE_URL", False),
|
||||
]
|
||||
# Cached at module level after first build — profiles auto-extend it.
|
||||
global _APIKEY_PROVIDERS_CACHE
|
||||
if _APIKEY_PROVIDERS_CACHE is None:
|
||||
_APIKEY_PROVIDERS_CACHE = _build_apikey_providers_list()
|
||||
_apikey_providers = _APIKEY_PROVIDERS_CACHE
|
||||
for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers:
|
||||
_key = ""
|
||||
for _ev in _env_vars:
|
||||
@@ -1215,7 +1298,7 @@ def run_doctor(args):
|
||||
|
||||
for tid in available:
|
||||
info = TOOLSET_REQUIREMENTS.get(tid, {})
|
||||
check_ok(info.get("name", tid))
|
||||
check_ok(info.get("name", tid), _doctor_tool_availability_detail(tid))
|
||||
|
||||
for item in unavailable:
|
||||
env_vars = item.get("missing_vars") or item.get("env_vars") or []
|
||||
@@ -1258,9 +1341,23 @@ def run_doctor(args):
|
||||
check_warn("Skills Hub directory not initialized", "(run: hermes skills list)")
|
||||
|
||||
from hermes_cli.config import get_env_value
|
||||
|
||||
def _gh_authenticated() -> bool:
|
||||
"""Check if gh CLI is authenticated via token file or device flow."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["gh", "auth", "status", "--json", "authenticated"],
|
||||
capture_output=True, timeout=10,
|
||||
)
|
||||
return result.returncode == 0
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
return False
|
||||
|
||||
github_token = get_env_value("GITHUB_TOKEN") or get_env_value("GH_TOKEN")
|
||||
if github_token:
|
||||
check_ok("GitHub token configured (authenticated API access)")
|
||||
elif _gh_authenticated():
|
||||
check_ok("GitHub authenticated via gh CLI", "(full API access — no GITHUB_TOKEN needed)")
|
||||
else:
|
||||
check_warn("No GITHUB_TOKEN", f"(60 req/hr rate limit — set in {_DHH}/.env for better rates)")
|
||||
|
||||
|
||||
+5
-8
@@ -14,6 +14,7 @@ import sys
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_cli.config import get_hermes_home, get_env_path, get_project_root, load_config
|
||||
from hermes_cli.env_loader import load_hermes_dotenv
|
||||
from hermes_constants import display_hermes_home
|
||||
|
||||
|
||||
@@ -195,15 +196,11 @@ def run_dump(args):
|
||||
show_keys = getattr(args, "show_keys", False)
|
||||
|
||||
# Load env from .env file so key checks work
|
||||
from dotenv import load_dotenv
|
||||
env_path = get_env_path()
|
||||
if env_path.exists():
|
||||
try:
|
||||
load_dotenv(env_path, encoding="utf-8")
|
||||
except UnicodeDecodeError:
|
||||
load_dotenv(env_path, encoding="latin-1")
|
||||
# Also try project .env as dev fallback
|
||||
load_dotenv(get_project_root() / ".env", override=False, encoding="utf-8")
|
||||
load_hermes_dotenv(
|
||||
hermes_home=env_path.parent,
|
||||
project_env=get_project_root() / ".env",
|
||||
)
|
||||
|
||||
project_root = get_project_root()
|
||||
hermes_home = get_hermes_home()
|
||||
|
||||
+241
-12
@@ -10,6 +10,7 @@ import shutil
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import textwrap
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
@@ -59,6 +60,13 @@ class GatewayRuntimeSnapshot:
|
||||
def has_process_service_mismatch(self) -> bool:
|
||||
return self.service_installed and self.running and not self.service_running
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProfileGatewayProcess:
|
||||
profile: str
|
||||
path: Path
|
||||
pid: int
|
||||
|
||||
def _get_service_pids() -> set:
|
||||
"""Return PIDs currently managed by systemd or launchd gateway services.
|
||||
|
||||
@@ -180,7 +188,7 @@ def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool:
|
||||
|
||||
SIGUSR1 is wired in gateway/run.py to ``request_restart(via_service=True)``
|
||||
which drains in-flight agent runs (up to ``agent.restart_drain_timeout``
|
||||
seconds), then exits with code 75. Both systemd (``Restart=on-failure``
|
||||
seconds), then exits with code 75. Both systemd (``Restart=always``
|
||||
+ ``RestartForceExitStatus=75``) and launchd (``KeepAlive.SuccessfulExit
|
||||
= false``) relaunch the process after the graceful exit.
|
||||
|
||||
@@ -229,6 +237,26 @@ def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _get_ancestor_pids() -> set[int]:
|
||||
"""Return the set of PIDs in the current process's ancestor chain.
|
||||
|
||||
Walks from the current PID up to PID 1 (init) so that process-table scans
|
||||
never match the calling CLI process or any of its parents. This prevents
|
||||
``hermes gateway status`` from falsely counting the ``hermes`` CLI that
|
||||
invoked it as a running gateway instance (see #13242).
|
||||
"""
|
||||
ancestors: set[int] = set()
|
||||
pid = os.getpid()
|
||||
# Cap iterations to avoid infinite loops on exotic platforms.
|
||||
for _ in range(64):
|
||||
ancestors.add(pid)
|
||||
parent = _get_parent_pid(pid)
|
||||
if parent is None or parent <= 0 or parent in ancestors:
|
||||
break
|
||||
pid = parent
|
||||
return ancestors
|
||||
|
||||
|
||||
def _append_unique_pid(pids: list[int], pid: int | None, exclude_pids: set[int]) -> None:
|
||||
if pid is None or pid <= 0:
|
||||
return
|
||||
@@ -244,6 +272,10 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
|
||||
a live gateway when the PID file is stale/missing, and ``--all`` sweeps can
|
||||
discover gateways outside the current profile.
|
||||
"""
|
||||
# Exclude the entire ancestor chain so the CLI process that invoked this
|
||||
# scan (e.g. ``hermes gateway status``) is never mistaken for a running
|
||||
# gateway. See #13242.
|
||||
exclude_pids = exclude_pids | _get_ancestor_pids()
|
||||
pids: list[int] = []
|
||||
patterns = [
|
||||
"hermes_cli.main gateway",
|
||||
@@ -371,6 +403,83 @@ def find_gateway_pids(exclude_pids: set | None = None, all_profiles: bool = Fals
|
||||
return pids
|
||||
|
||||
|
||||
def find_profile_gateway_processes(
|
||||
exclude_pids: set | None = None,
|
||||
) -> list[ProfileGatewayProcess]:
|
||||
"""Return running gateway PIDs mapped to Hermes profiles via PID files."""
|
||||
_exclude = set(exclude_pids or set())
|
||||
processes: list[ProfileGatewayProcess] = []
|
||||
try:
|
||||
from gateway.status import get_running_pid
|
||||
from hermes_cli.profiles import list_profiles
|
||||
except Exception:
|
||||
return processes
|
||||
|
||||
seen: set[int] = set()
|
||||
for profile in list_profiles():
|
||||
try:
|
||||
pid = get_running_pid(profile.path / "gateway.pid", cleanup_stale=False)
|
||||
except Exception:
|
||||
continue
|
||||
if pid is None or pid <= 0 or pid in _exclude or pid in seen:
|
||||
continue
|
||||
seen.add(pid)
|
||||
processes.append(ProfileGatewayProcess(profile=profile.name, path=profile.path, pid=pid))
|
||||
return processes
|
||||
|
||||
|
||||
def _gateway_run_args_for_profile(profile: str) -> list[str]:
|
||||
args = [get_python_path(), "-m", "hermes_cli.main"]
|
||||
if profile != "default":
|
||||
args.extend(["--profile", profile])
|
||||
args.extend(["gateway", "run", "--replace"])
|
||||
return args
|
||||
|
||||
|
||||
def launch_detached_profile_gateway_restart(profile: str, old_pid: int) -> bool:
|
||||
"""Relaunch a manually-run profile gateway after its current PID exits."""
|
||||
if old_pid <= 0:
|
||||
return False
|
||||
|
||||
watcher = textwrap.dedent(
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
pid = int(sys.argv[1])
|
||||
cmd = sys.argv[2:]
|
||||
deadline = time.monotonic() + 120
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
except ProcessLookupError:
|
||||
break
|
||||
except PermissionError:
|
||||
pass
|
||||
time.sleep(0.2)
|
||||
subprocess.Popen(
|
||||
cmd,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
start_new_session=True,
|
||||
)
|
||||
"""
|
||||
).strip()
|
||||
|
||||
try:
|
||||
subprocess.Popen(
|
||||
[sys.executable, "-c", watcher, str(old_pid), *_gateway_run_args_for_profile(profile)],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
start_new_session=True,
|
||||
)
|
||||
except OSError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _probe_systemd_service_running(system: bool = False) -> tuple[bool, bool]:
|
||||
selected_system = _select_systemd_scope(system)
|
||||
unit_exists = get_systemd_unit_path(system=selected_system).exists()
|
||||
@@ -605,6 +714,32 @@ def _print_gateway_process_mismatch(snapshot: GatewayRuntimeSnapshot) -> None:
|
||||
print(" can refuse to start another copy until this process stops.")
|
||||
|
||||
|
||||
def _print_other_profiles_gateway_status() -> None:
|
||||
"""Print a summary of gateway status across all profiles.
|
||||
|
||||
Shown at the bottom of ``hermes gateway status`` output so users with
|
||||
multiple profiles can tell at a glance which gateways are running and
|
||||
avoid confusing another profile's process with the current one.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.profiles import get_active_profile_name
|
||||
|
||||
current = get_active_profile_name()
|
||||
other_processes = [
|
||||
p for p in find_profile_gateway_processes()
|
||||
if p.profile != current
|
||||
]
|
||||
if not other_processes:
|
||||
return
|
||||
|
||||
print()
|
||||
print("Other profiles:")
|
||||
for proc in other_processes:
|
||||
print(f" ✓ {proc.profile:<16s} — PID {proc.pid}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def kill_gateway_processes(force: bool = False, exclude_pids: set | None = None,
|
||||
all_profiles: bool = False) -> int:
|
||||
"""Kill any running gateway processes. Returns count killed.
|
||||
@@ -650,6 +785,12 @@ def stop_profile_gateway() -> bool:
|
||||
if pid is None:
|
||||
return False
|
||||
|
||||
try:
|
||||
from gateway.status import write_planned_stop_marker
|
||||
write_planned_stop_marker(pid)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
except ProcessLookupError:
|
||||
@@ -1473,6 +1614,46 @@ def _build_user_local_paths(home: Path, path_entries: list[str]) -> list[str]:
|
||||
return [p for p in candidates if p not in path_entries and Path(p).exists()]
|
||||
|
||||
|
||||
def _build_wsl_interop_paths(path_entries: list[str]) -> list[str]:
|
||||
"""Return WSL Windows interop PATH entries for generated systemd units.
|
||||
|
||||
WSL shells normally inherit Windows PATH entries such as
|
||||
``/mnt/c/WINDOWS/System32``. systemd user services do not, so gateway tools
|
||||
that call ``powershell.exe``/``cmd.exe`` work in a terminal but fail in the
|
||||
background service unless we persist the relevant entries at install time.
|
||||
"""
|
||||
if not is_wsl():
|
||||
return []
|
||||
|
||||
candidates: list[str] = []
|
||||
for entry in os.environ.get("PATH", "").split(os.pathsep):
|
||||
if entry.startswith("/mnt/"):
|
||||
candidates.append(entry)
|
||||
|
||||
for executable in ("powershell.exe", "cmd.exe", "explorer.exe", "wsl.exe"):
|
||||
resolved = shutil.which(executable)
|
||||
if resolved:
|
||||
candidates.append(str(Path(resolved).parent))
|
||||
|
||||
for entry in (
|
||||
"/mnt/c/WINDOWS/system32",
|
||||
"/mnt/c/WINDOWS",
|
||||
"/mnt/c/WINDOWS/System32/Wbem",
|
||||
"/mnt/c/WINDOWS/System32/WindowsPowerShell/v1.0/",
|
||||
"/mnt/c/WINDOWS/System32/OpenSSH/",
|
||||
):
|
||||
if Path(entry).exists():
|
||||
candidates.append(entry)
|
||||
|
||||
result: list[str] = []
|
||||
seen = set(path_entries)
|
||||
for entry in candidates:
|
||||
if entry and entry not in seen:
|
||||
seen.add(entry)
|
||||
result.append(entry)
|
||||
return result
|
||||
|
||||
|
||||
def _remap_path_for_user(path: str, target_home_dir: str) -> str:
|
||||
"""Remap *path* from the current user's home to *target_home_dir*.
|
||||
|
||||
@@ -1564,14 +1745,14 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
|
||||
node_bin = _remap_path_for_user(node_bin, home_dir)
|
||||
path_entries = [_remap_path_for_user(p, home_dir) for p in path_entries]
|
||||
path_entries.extend(_build_user_local_paths(Path(home_dir), path_entries))
|
||||
path_entries.extend(_build_wsl_interop_paths(path_entries))
|
||||
path_entries.extend(common_bin_paths)
|
||||
sane_path = ":".join(path_entries)
|
||||
return f"""[Unit]
|
||||
Description={SERVICE_DESCRIPTION}
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
StartLimitIntervalSec=600
|
||||
StartLimitBurst=5
|
||||
StartLimitIntervalSec=0
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
@@ -1585,8 +1766,10 @@ Environment="LOGNAME={username}"
|
||||
Environment="PATH={sane_path}"
|
||||
Environment="VIRTUAL_ENV={venv_dir}"
|
||||
Environment="HERMES_HOME={hermes_home}"
|
||||
Restart=on-failure
|
||||
RestartSec=30
|
||||
Restart=always
|
||||
RestartSec=60
|
||||
RestartMaxDelaySec=300
|
||||
RestartSteps=5
|
||||
RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}
|
||||
KillMode=mixed
|
||||
KillSignal=SIGTERM
|
||||
@@ -1602,13 +1785,14 @@ WantedBy=multi-user.target
|
||||
hermes_home = str(get_hermes_home().resolve())
|
||||
profile_arg = _profile_arg(hermes_home)
|
||||
path_entries.extend(_build_user_local_paths(Path.home(), path_entries))
|
||||
path_entries.extend(_build_wsl_interop_paths(path_entries))
|
||||
path_entries.extend(common_bin_paths)
|
||||
sane_path = ":".join(path_entries)
|
||||
return f"""[Unit]
|
||||
Description={SERVICE_DESCRIPTION}
|
||||
After=network.target
|
||||
StartLimitIntervalSec=600
|
||||
StartLimitBurst=5
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
StartLimitIntervalSec=0
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
@@ -1617,8 +1801,10 @@ WorkingDirectory={working_dir}
|
||||
Environment="PATH={sane_path}"
|
||||
Environment="VIRTUAL_ENV={venv_dir}"
|
||||
Environment="HERMES_HOME={hermes_home}"
|
||||
Restart=on-failure
|
||||
RestartSec=30
|
||||
Restart=always
|
||||
RestartSec=60
|
||||
RestartMaxDelaySec=300
|
||||
RestartSteps=5
|
||||
RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}
|
||||
KillMode=mixed
|
||||
KillSignal=SIGTERM
|
||||
@@ -1833,6 +2019,15 @@ def systemd_uninstall(system: bool = False):
|
||||
print(f"✓ {_service_scope_label(system).capitalize()} service uninstalled")
|
||||
|
||||
|
||||
def _require_service_installed(action: str, system: bool = False) -> None:
|
||||
unit_path = get_systemd_unit_path(system=system)
|
||||
if not unit_path.exists():
|
||||
scope_flag = " --system" if system else ""
|
||||
print(f"✗ Gateway service is not installed")
|
||||
print(f" Run: {'sudo ' if system else ''}hermes gateway install{scope_flag}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def systemd_start(system: bool = False):
|
||||
system = _select_systemd_scope(system)
|
||||
if system:
|
||||
@@ -1842,6 +2037,7 @@ def systemd_start(system: bool = False):
|
||||
# reachable (common on fresh RHEL/Debian SSH sessions without linger).
|
||||
# Raises UserSystemdUnavailableError with a remediation message.
|
||||
_preflight_user_systemd()
|
||||
_require_service_installed("start", system=system)
|
||||
refresh_systemd_unit_if_needed(system=system)
|
||||
_run_systemctl(["start", get_service_name()], system=system, check=True, timeout=30)
|
||||
print(f"✓ {_service_scope_label(system).capitalize()} service started")
|
||||
@@ -1852,6 +2048,14 @@ def systemd_stop(system: bool = False):
|
||||
system = _select_systemd_scope(system)
|
||||
if system:
|
||||
_require_root_for_system_service("stop")
|
||||
_require_service_installed("stop", system=system)
|
||||
try:
|
||||
from gateway.status import get_running_pid, write_planned_stop_marker
|
||||
pid = get_running_pid(cleanup_stale=False)
|
||||
if pid is not None:
|
||||
write_planned_stop_marker(pid)
|
||||
except Exception:
|
||||
pass
|
||||
_run_systemctl(["stop", get_service_name()], system=system, check=True, timeout=90)
|
||||
print(f"✓ {_service_scope_label(system).capitalize()} service stopped")
|
||||
|
||||
@@ -1863,6 +2067,7 @@ def systemd_restart(system: bool = False):
|
||||
_require_root_for_system_service("restart")
|
||||
else:
|
||||
_preflight_user_systemd()
|
||||
_require_service_installed("restart", system=system)
|
||||
refresh_systemd_unit_if_needed(system=system)
|
||||
from gateway.status import get_running_pid
|
||||
|
||||
@@ -2212,6 +2417,13 @@ def launchd_start():
|
||||
def launchd_stop():
|
||||
label = get_launchd_label()
|
||||
target = f"{_launchd_domain()}/{label}"
|
||||
try:
|
||||
from gateway.status import get_running_pid, write_planned_stop_marker
|
||||
pid = get_running_pid(cleanup_stale=False)
|
||||
if pid is not None:
|
||||
write_planned_stop_marker(pid)
|
||||
except Exception:
|
||||
pass
|
||||
# bootout unloads the service definition so KeepAlive doesn't respawn
|
||||
# the process. A plain `kill SIGTERM` only signals the process — launchd
|
||||
# immediately restarts it because KeepAlive.SuccessfulExit = false.
|
||||
@@ -2354,6 +2566,20 @@ def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False):
|
||||
hasn't fully exited yet.
|
||||
"""
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
# Refresh the systemd unit definition on every boot so that restart
|
||||
# settings (RestartSec, StartLimitIntervalSec, etc.) stay current even
|
||||
# when the process was respawned via exit-code-75 (stale-code or
|
||||
# /restart) rather than through `hermes gateway restart` which already
|
||||
# calls refresh_systemd_unit_if_needed(). Without this, a code update
|
||||
# that ships new unit settings won't take effect until the next manual
|
||||
# `hermes gateway start/restart` — leaving the gateway vulnerable to
|
||||
# the exact failure mode the new settings were meant to prevent.
|
||||
if supports_systemd_services():
|
||||
try:
|
||||
refresh_systemd_unit_if_needed(system=False)
|
||||
except Exception:
|
||||
pass # best-effort; don't block gateway startup
|
||||
|
||||
from gateway.run import start_gateway
|
||||
|
||||
@@ -2366,7 +2592,7 @@ def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False):
|
||||
print()
|
||||
|
||||
# Exit with code 1 if gateway fails to connect any platform,
|
||||
# so systemd Restart=on-failure will retry on transient errors
|
||||
# so systemd Restart=always will retry on transient errors
|
||||
verbosity = None if quiet else verbose
|
||||
try:
|
||||
success = asyncio.run(start_gateway(replace=replace, verbosity=verbosity))
|
||||
@@ -4368,6 +4594,9 @@ def _gateway_command_inner(args):
|
||||
print(" hermes gateway install # Install as user service")
|
||||
print(" sudo hermes gateway install --system # Install as boot-time system service")
|
||||
|
||||
# Show other profiles' gateway status for multi-profile awareness
|
||||
_print_other_profiles_gateway_status()
|
||||
|
||||
elif subcmd == "migrate-legacy":
|
||||
# Stop, disable, and remove legacy Hermes gateway unit files from
|
||||
# pre-rename installs (e.g. hermes.service). Profile units and
|
||||
@@ -4377,4 +4606,4 @@ def _gateway_command_inner(args):
|
||||
if not supports_systemd_services() and not is_macos():
|
||||
print("Legacy unit migration only applies to systemd-based Linux hosts.")
|
||||
return
|
||||
remove_legacy_hermes_units(interactive=not yes, dry_run=dry_run)
|
||||
remove_legacy_hermes_units(interactive=not yes, dry_run=dry_run)
|
||||
|
||||
@@ -0,0 +1,535 @@
|
||||
"""Persistent session goals — the Ralph loop for Hermes.
|
||||
|
||||
A goal is a free-form user objective that stays active across turns. After
|
||||
each turn completes, a small judge call asks an auxiliary model "is this
|
||||
goal satisfied by the assistant's last response?". If not, Hermes feeds a
|
||||
continuation prompt back into the same session and keeps working until the
|
||||
goal is done, turn budget is exhausted, the user pauses/clears it, or the
|
||||
user sends a new message (which takes priority and pauses the goal loop).
|
||||
|
||||
State is persisted in SessionDB's ``state_meta`` table keyed by
|
||||
``goal:<session_id>`` so ``/resume`` picks it up.
|
||||
|
||||
Design notes / invariants:
|
||||
|
||||
- The continuation prompt is just a normal user message appended to the
|
||||
session via ``run_conversation``. No system-prompt mutation, no toolset
|
||||
swap — prompt caching stays intact.
|
||||
- Judge failures are fail-OPEN: ``continue``. A broken judge must not wedge
|
||||
progress; the turn budget is the backstop.
|
||||
- When a real user message arrives mid-loop it preempts the continuation
|
||||
prompt and also pauses the goal loop for that turn (we still re-judge
|
||||
after, so if the user's message happens to complete the goal the judge
|
||||
will say ``done``).
|
||||
- This module has zero hard dependency on ``cli.HermesCLI`` or the gateway
|
||||
runner — both wire the same ``GoalManager`` in.
|
||||
|
||||
Nothing in this module touches the agent's system prompt or toolset.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass, asdict
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Constants & defaults
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
DEFAULT_MAX_TURNS = 20
|
||||
DEFAULT_JUDGE_TIMEOUT = 30.0
|
||||
# Cap how much of the last response + recent messages we send to the judge.
|
||||
_JUDGE_RESPONSE_SNIPPET_CHARS = 4000
|
||||
|
||||
|
||||
CONTINUATION_PROMPT_TEMPLATE = (
|
||||
"[Continuing toward your standing goal]\n"
|
||||
"Goal: {goal}\n\n"
|
||||
"Continue working toward this goal. Take the next concrete step. "
|
||||
"If you believe the goal is complete, state so explicitly and stop. "
|
||||
"If you are blocked and need input from the user, say so clearly and stop."
|
||||
)
|
||||
|
||||
|
||||
JUDGE_SYSTEM_PROMPT = (
|
||||
"You are a strict judge evaluating whether an autonomous agent has "
|
||||
"achieved a user's stated goal. You receive the goal text and the "
|
||||
"agent's most recent response. Your only job is to decide whether "
|
||||
"the goal is fully satisfied based on that response.\n\n"
|
||||
"A goal is DONE only when:\n"
|
||||
"- The response explicitly confirms the goal was completed, OR\n"
|
||||
"- The response clearly shows the final deliverable was produced, OR\n"
|
||||
"- The response explains the goal is unachievable / blocked / needs "
|
||||
"user input (treat this as DONE with reason describing the block).\n\n"
|
||||
"Otherwise the goal is NOT done — CONTINUE.\n\n"
|
||||
"Reply ONLY with a single JSON object on one line:\n"
|
||||
'{\"done\": <true|false>, \"reason\": \"<one-sentence rationale>\"}'
|
||||
)
|
||||
|
||||
|
||||
JUDGE_USER_PROMPT_TEMPLATE = (
|
||||
"Goal:\n{goal}\n\n"
|
||||
"Agent's most recent response:\n{response}\n\n"
|
||||
"Is the goal satisfied?"
|
||||
)
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Dataclass
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@dataclass
|
||||
class GoalState:
|
||||
"""Serializable goal state stored per session."""
|
||||
|
||||
goal: str
|
||||
status: str = "active" # active | paused | done | cleared
|
||||
turns_used: int = 0
|
||||
max_turns: int = DEFAULT_MAX_TURNS
|
||||
created_at: float = 0.0
|
||||
last_turn_at: float = 0.0
|
||||
last_verdict: Optional[str] = None # "done" | "continue" | "skipped"
|
||||
last_reason: Optional[str] = None
|
||||
paused_reason: Optional[str] = None # why we auto-paused (budget, etc.)
|
||||
|
||||
def to_json(self) -> str:
|
||||
return json.dumps(asdict(self), ensure_ascii=False)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, raw: str) -> "GoalState":
|
||||
data = json.loads(raw)
|
||||
return cls(
|
||||
goal=data.get("goal", ""),
|
||||
status=data.get("status", "active"),
|
||||
turns_used=int(data.get("turns_used", 0) or 0),
|
||||
max_turns=int(data.get("max_turns", DEFAULT_MAX_TURNS) or DEFAULT_MAX_TURNS),
|
||||
created_at=float(data.get("created_at", 0.0) or 0.0),
|
||||
last_turn_at=float(data.get("last_turn_at", 0.0) or 0.0),
|
||||
last_verdict=data.get("last_verdict"),
|
||||
last_reason=data.get("last_reason"),
|
||||
paused_reason=data.get("paused_reason"),
|
||||
)
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Persistence (SessionDB state_meta)
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _meta_key(session_id: str) -> str:
|
||||
return f"goal:{session_id}"
|
||||
|
||||
|
||||
_DB_CACHE: Dict[str, Any] = {}
|
||||
|
||||
|
||||
def _get_session_db() -> Optional[Any]:
|
||||
"""Return a SessionDB instance for the current HERMES_HOME.
|
||||
|
||||
SessionDB has no built-in singleton, but opening a new connection per
|
||||
/goal call would thrash the file. We cache one instance per
|
||||
``hermes_home`` path so profile switches still pick up the right DB.
|
||||
Defensive against import/instantiation failures so tests and
|
||||
non-standard launchers can still use the GoalManager.
|
||||
"""
|
||||
try:
|
||||
from hermes_constants import get_hermes_home
|
||||
from hermes_state import SessionDB
|
||||
|
||||
home = str(get_hermes_home())
|
||||
except Exception as exc: # pragma: no cover
|
||||
logger.debug("GoalManager: SessionDB bootstrap failed (%s)", exc)
|
||||
return None
|
||||
|
||||
cached = _DB_CACHE.get(home)
|
||||
if cached is not None:
|
||||
return cached
|
||||
try:
|
||||
db = SessionDB()
|
||||
except Exception as exc: # pragma: no cover
|
||||
logger.debug("GoalManager: SessionDB() raised (%s)", exc)
|
||||
return None
|
||||
_DB_CACHE[home] = db
|
||||
return db
|
||||
|
||||
|
||||
def load_goal(session_id: str) -> Optional[GoalState]:
|
||||
"""Load the goal for a session, or None if none exists."""
|
||||
if not session_id:
|
||||
return None
|
||||
db = _get_session_db()
|
||||
if db is None:
|
||||
return None
|
||||
try:
|
||||
raw = db.get_meta(_meta_key(session_id))
|
||||
except Exception as exc:
|
||||
logger.debug("GoalManager: get_meta failed: %s", exc)
|
||||
return None
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
return GoalState.from_json(raw)
|
||||
except Exception as exc:
|
||||
logger.warning("GoalManager: could not parse stored goal for %s: %s", session_id, exc)
|
||||
return None
|
||||
|
||||
|
||||
def save_goal(session_id: str, state: GoalState) -> None:
|
||||
"""Persist a goal to SessionDB. No-op if DB unavailable."""
|
||||
if not session_id:
|
||||
return
|
||||
db = _get_session_db()
|
||||
if db is None:
|
||||
return
|
||||
try:
|
||||
db.set_meta(_meta_key(session_id), state.to_json())
|
||||
except Exception as exc:
|
||||
logger.debug("GoalManager: set_meta failed: %s", exc)
|
||||
|
||||
|
||||
def clear_goal(session_id: str) -> None:
|
||||
"""Mark a goal cleared in the DB (preserved for audit, status=cleared)."""
|
||||
state = load_goal(session_id)
|
||||
if state is None:
|
||||
return
|
||||
state.status = "cleared"
|
||||
save_goal(session_id, state)
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Judge
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _truncate(text: str, limit: int) -> str:
|
||||
if not text:
|
||||
return ""
|
||||
if len(text) <= limit:
|
||||
return text
|
||||
return text[:limit] + "… [truncated]"
|
||||
|
||||
|
||||
_JSON_OBJECT_RE = re.compile(r"\{.*?\}", re.DOTALL)
|
||||
|
||||
|
||||
def _parse_judge_response(raw: str) -> Tuple[bool, str]:
|
||||
"""Parse the judge's reply. Fail-open to ``(False, "<reason>")``.
|
||||
|
||||
Returns ``(done, reason)``.
|
||||
"""
|
||||
if not raw:
|
||||
return False, "judge returned empty response"
|
||||
|
||||
text = raw.strip()
|
||||
|
||||
# Strip markdown code fences the model may wrap JSON in.
|
||||
if text.startswith("```"):
|
||||
text = text.strip("`")
|
||||
# Peel off leading json/JSON/etc tag
|
||||
nl = text.find("\n")
|
||||
if nl != -1:
|
||||
text = text[nl + 1:]
|
||||
|
||||
# First try: parse the whole blob.
|
||||
data: Optional[Dict[str, Any]] = None
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except Exception:
|
||||
# Second try: pull the first JSON object out.
|
||||
match = _JSON_OBJECT_RE.search(text)
|
||||
if match:
|
||||
try:
|
||||
data = json.loads(match.group(0))
|
||||
except Exception:
|
||||
data = None
|
||||
|
||||
if not isinstance(data, dict):
|
||||
return False, f"judge reply was not JSON: {_truncate(raw, 200)!r}"
|
||||
|
||||
done_val = data.get("done")
|
||||
if isinstance(done_val, str):
|
||||
done = done_val.strip().lower() in ("true", "yes", "1", "done")
|
||||
else:
|
||||
done = bool(done_val)
|
||||
reason = str(data.get("reason") or "").strip()
|
||||
if not reason:
|
||||
reason = "no reason provided"
|
||||
return done, reason
|
||||
|
||||
|
||||
def judge_goal(
|
||||
goal: str,
|
||||
last_response: str,
|
||||
*,
|
||||
timeout: float = DEFAULT_JUDGE_TIMEOUT,
|
||||
) -> Tuple[str, str]:
|
||||
"""Ask the auxiliary model whether the goal is satisfied.
|
||||
|
||||
Returns ``(verdict, reason)`` where verdict is ``"done"``, ``"continue"``,
|
||||
or ``"skipped"`` (when the judge couldn't be reached).
|
||||
|
||||
This is deliberately fail-open: any error returns ``("continue", "...")``
|
||||
so a broken judge doesn't wedge progress — the turn budget is the
|
||||
backstop.
|
||||
"""
|
||||
if not goal.strip():
|
||||
return "skipped", "empty goal"
|
||||
if not last_response.strip():
|
||||
# No substantive reply this turn — almost certainly not done yet.
|
||||
return "continue", "empty response (nothing to evaluate)"
|
||||
|
||||
try:
|
||||
from agent.auxiliary_client import get_text_auxiliary_client
|
||||
except Exception as exc:
|
||||
logger.debug("goal judge: auxiliary client import failed: %s", exc)
|
||||
return "continue", "auxiliary client unavailable"
|
||||
|
||||
try:
|
||||
client, model = get_text_auxiliary_client("goal_judge")
|
||||
except Exception as exc:
|
||||
logger.debug("goal judge: get_text_auxiliary_client failed: %s", exc)
|
||||
return "continue", "auxiliary client unavailable"
|
||||
|
||||
if client is None or not model:
|
||||
return "continue", "no auxiliary client configured"
|
||||
|
||||
prompt = JUDGE_USER_PROMPT_TEMPLATE.format(
|
||||
goal=_truncate(goal, 2000),
|
||||
response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
|
||||
)
|
||||
|
||||
try:
|
||||
resp = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": JUDGE_SYSTEM_PROMPT},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
temperature=0,
|
||||
max_tokens=200,
|
||||
timeout=timeout,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.info("goal judge: API call failed (%s) — falling through to continue", exc)
|
||||
return "continue", f"judge error: {type(exc).__name__}"
|
||||
|
||||
try:
|
||||
raw = resp.choices[0].message.content or ""
|
||||
except Exception:
|
||||
raw = ""
|
||||
|
||||
done, reason = _parse_judge_response(raw)
|
||||
verdict = "done" if done else "continue"
|
||||
logger.info("goal judge: verdict=%s reason=%s", verdict, _truncate(reason, 120))
|
||||
return verdict, reason
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# GoalManager — the orchestration surface CLI + gateway talk to
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class GoalManager:
|
||||
"""Per-session goal state + continuation decisions.
|
||||
|
||||
The CLI and gateway each hold one ``GoalManager`` per live session.
|
||||
|
||||
Methods:
|
||||
|
||||
- ``set(goal)`` — start a new standing goal.
|
||||
- ``clear()`` — remove the active goal.
|
||||
- ``pause()`` / ``resume()`` — explicit user controls.
|
||||
- ``status()`` — printable one-liner.
|
||||
- ``evaluate_after_turn(last_response)`` — call the judge, update state,
|
||||
and return a decision dict the caller uses to drive the next turn.
|
||||
- ``next_continuation_prompt()`` — the canonical user-role message to
|
||||
feed back into ``run_conversation``.
|
||||
"""
|
||||
|
||||
def __init__(self, session_id: str, *, default_max_turns: int = DEFAULT_MAX_TURNS):
|
||||
self.session_id = session_id
|
||||
self.default_max_turns = int(default_max_turns or DEFAULT_MAX_TURNS)
|
||||
self._state: Optional[GoalState] = load_goal(session_id)
|
||||
|
||||
# --- introspection ------------------------------------------------
|
||||
|
||||
@property
|
||||
def state(self) -> Optional[GoalState]:
|
||||
return self._state
|
||||
|
||||
def is_active(self) -> bool:
|
||||
return self._state is not None and self._state.status == "active"
|
||||
|
||||
def has_goal(self) -> bool:
|
||||
return self._state is not None and self._state.status in ("active", "paused")
|
||||
|
||||
def status_line(self) -> str:
|
||||
s = self._state
|
||||
if s is None or s.status in ("cleared",):
|
||||
return "No active goal. Set one with /goal <text>."
|
||||
turns = f"{s.turns_used}/{s.max_turns} turns"
|
||||
if s.status == "active":
|
||||
return f"⊙ Goal (active, {turns}): {s.goal}"
|
||||
if s.status == "paused":
|
||||
extra = f" — {s.paused_reason}" if s.paused_reason else ""
|
||||
return f"⏸ Goal (paused, {turns}{extra}): {s.goal}"
|
||||
if s.status == "done":
|
||||
return f"✓ Goal done ({turns}): {s.goal}"
|
||||
return f"Goal ({s.status}, {turns}): {s.goal}"
|
||||
|
||||
# --- mutation -----------------------------------------------------
|
||||
|
||||
def set(self, goal: str, *, max_turns: Optional[int] = None) -> GoalState:
|
||||
goal = (goal or "").strip()
|
||||
if not goal:
|
||||
raise ValueError("goal text is empty")
|
||||
state = GoalState(
|
||||
goal=goal,
|
||||
status="active",
|
||||
turns_used=0,
|
||||
max_turns=int(max_turns) if max_turns else self.default_max_turns,
|
||||
created_at=time.time(),
|
||||
last_turn_at=0.0,
|
||||
)
|
||||
self._state = state
|
||||
save_goal(self.session_id, state)
|
||||
return state
|
||||
|
||||
def pause(self, reason: str = "user-paused") -> Optional[GoalState]:
|
||||
if not self._state:
|
||||
return None
|
||||
self._state.status = "paused"
|
||||
self._state.paused_reason = reason
|
||||
save_goal(self.session_id, self._state)
|
||||
return self._state
|
||||
|
||||
def resume(self, *, reset_budget: bool = True) -> Optional[GoalState]:
|
||||
if not self._state:
|
||||
return None
|
||||
self._state.status = "active"
|
||||
self._state.paused_reason = None
|
||||
if reset_budget:
|
||||
self._state.turns_used = 0
|
||||
save_goal(self.session_id, self._state)
|
||||
return self._state
|
||||
|
||||
def clear(self) -> None:
|
||||
if self._state is None:
|
||||
return
|
||||
self._state.status = "cleared"
|
||||
save_goal(self.session_id, self._state)
|
||||
self._state = None
|
||||
|
||||
def mark_done(self, reason: str) -> None:
|
||||
if not self._state:
|
||||
return
|
||||
self._state.status = "done"
|
||||
self._state.last_verdict = "done"
|
||||
self._state.last_reason = reason
|
||||
save_goal(self.session_id, self._state)
|
||||
|
||||
# --- the main entry point called after every turn -----------------
|
||||
|
||||
def evaluate_after_turn(
|
||||
self,
|
||||
last_response: str,
|
||||
*,
|
||||
user_initiated: bool = True,
|
||||
) -> Dict[str, Any]:
|
||||
"""Run the judge and update state. Return a decision dict.
|
||||
|
||||
``user_initiated`` distinguishes a real user prompt (True) from a
|
||||
continuation prompt we fed ourselves (False). Both increment
|
||||
``turns_used`` because both consume model budget.
|
||||
|
||||
Decision keys:
|
||||
- ``status``: current goal status after update
|
||||
- ``should_continue``: bool — caller should fire another turn
|
||||
- ``continuation_prompt``: str or None
|
||||
- ``verdict``: "done" | "continue" | "skipped" | "inactive"
|
||||
- ``reason``: str
|
||||
- ``message``: user-visible one-liner to print/send
|
||||
"""
|
||||
state = self._state
|
||||
if state is None or state.status != "active":
|
||||
return {
|
||||
"status": state.status if state else None,
|
||||
"should_continue": False,
|
||||
"continuation_prompt": None,
|
||||
"verdict": "inactive",
|
||||
"reason": "no active goal",
|
||||
"message": "",
|
||||
}
|
||||
|
||||
# Count the turn that just finished.
|
||||
state.turns_used += 1
|
||||
state.last_turn_at = time.time()
|
||||
|
||||
verdict, reason = judge_goal(state.goal, last_response)
|
||||
state.last_verdict = verdict
|
||||
state.last_reason = reason
|
||||
|
||||
if verdict == "done":
|
||||
state.status = "done"
|
||||
save_goal(self.session_id, state)
|
||||
return {
|
||||
"status": "done",
|
||||
"should_continue": False,
|
||||
"continuation_prompt": None,
|
||||
"verdict": "done",
|
||||
"reason": reason,
|
||||
"message": f"✓ Goal achieved: {reason}",
|
||||
}
|
||||
|
||||
if state.turns_used >= state.max_turns:
|
||||
state.status = "paused"
|
||||
state.paused_reason = f"turn budget exhausted ({state.turns_used}/{state.max_turns})"
|
||||
save_goal(self.session_id, state)
|
||||
return {
|
||||
"status": "paused",
|
||||
"should_continue": False,
|
||||
"continuation_prompt": None,
|
||||
"verdict": "continue",
|
||||
"reason": reason,
|
||||
"message": (
|
||||
f"⏸ Goal paused — {state.turns_used}/{state.max_turns} turns used. "
|
||||
"Use /goal resume to keep going, or /goal clear to stop."
|
||||
),
|
||||
}
|
||||
|
||||
save_goal(self.session_id, state)
|
||||
return {
|
||||
"status": "active",
|
||||
"should_continue": True,
|
||||
"continuation_prompt": self.next_continuation_prompt(),
|
||||
"verdict": "continue",
|
||||
"reason": reason,
|
||||
"message": (
|
||||
f"↻ Continuing toward goal ({state.turns_used}/{state.max_turns}): {reason}"
|
||||
),
|
||||
}
|
||||
|
||||
def next_continuation_prompt(self) -> Optional[str]:
|
||||
if not self._state or self._state.status != "active":
|
||||
return None
|
||||
return CONTINUATION_PROMPT_TEMPLATE.format(goal=self._state.goal)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"GoalState",
|
||||
"GoalManager",
|
||||
"CONTINUATION_PROMPT_TEMPLATE",
|
||||
"DEFAULT_MAX_TURNS",
|
||||
"load_goal",
|
||||
"save_goal",
|
||||
"clear_goal",
|
||||
"judge_goal",
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,649 @@
|
||||
"""Kanban diagnostics — structured, actionable distress signals for tasks.
|
||||
|
||||
A ``Diagnostic`` is a machine-readable description of something that's wrong
|
||||
with a kanban task: a hallucinated card id, a spawn crash-loop, a task
|
||||
stuck blocked for too long, etc. Each one carries:
|
||||
|
||||
* A **kind** (canonical code; UI/tests match on this).
|
||||
* A **severity** (``warning`` / ``error`` / ``critical``).
|
||||
* A **title** (one-line human description) and **detail** (longer text).
|
||||
* A list of **suggested actions** — structured entries the dashboard
|
||||
turns into buttons and the CLI turns into hints.
|
||||
|
||||
Rules run over (task, recent events, recent runs) and emit diagnostics.
|
||||
They are stateless and read-only — no DB writes. Callers compute
|
||||
diagnostics on demand (on ``/board`` load, ``/tasks/:id`` fetch, or
|
||||
``hermes kanban diagnostics``).
|
||||
|
||||
Design goals:
|
||||
|
||||
* Fixable-on-the-operator's-side signals only (missing config, phantom
|
||||
ids, crash loop). Not "the provider returned 502 once" — that's a
|
||||
transient runtime blip, not a diagnostic.
|
||||
* Recoverable: every diagnostic comes with at least one suggested
|
||||
recovery action the operator can actually take from the UI.
|
||||
* Auto-clearing: when the underlying failure mode resolves (a clean
|
||||
``completed`` event arrives, a spawn succeeds, the task gets
|
||||
unblocked), the diagnostic stops firing. The audit event trail stays.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Iterable, Optional
|
||||
import json
|
||||
import time
|
||||
|
||||
|
||||
# Severity rungs, ordered least → most urgent. The UI colors them
|
||||
# amber (warning), orange (error), red (critical). Sorted outputs put
|
||||
# critical first so operators see the worst fires at the top.
|
||||
SEVERITY_ORDER = ("warning", "error", "critical")
|
||||
|
||||
|
||||
@dataclass
|
||||
class DiagnosticAction:
|
||||
"""A single recovery action attached to a diagnostic.
|
||||
|
||||
The ``kind`` determines how both the UI and CLI render it:
|
||||
|
||||
* ``reclaim`` / ``reassign`` — POST to the matching /tasks/:id/*
|
||||
endpoint; dashboard wires into the existing recovery popover.
|
||||
* ``unblock`` — PATCH status back to ``ready`` (for stuck-blocked
|
||||
diagnostics).
|
||||
* ``cli_hint`` — print/copy a shell command (e.g.
|
||||
``hermes -p <profile> auth``). No HTTP side effect.
|
||||
* ``open_docs`` — deep-link to the docs URL named in ``payload.url``.
|
||||
* ``comment`` — nudge the operator to add a comment (for
|
||||
stuck-blocked tasks that need human input).
|
||||
|
||||
``suggested=True`` marks the action as the recommended first step;
|
||||
the UI highlights it. Multiple actions can be suggested if they're
|
||||
equally valid.
|
||||
"""
|
||||
|
||||
kind: str
|
||||
label: str
|
||||
payload: dict = field(default_factory=dict)
|
||||
suggested: bool = False
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"kind": self.kind,
|
||||
"label": self.label,
|
||||
"payload": self.payload,
|
||||
"suggested": self.suggested,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Diagnostic:
|
||||
"""One active distress signal on a task."""
|
||||
|
||||
kind: str
|
||||
severity: str # "warning" | "error" | "critical"
|
||||
title: str
|
||||
detail: str
|
||||
actions: list[DiagnosticAction] = field(default_factory=list)
|
||||
first_seen_at: int = 0
|
||||
last_seen_at: int = 0
|
||||
count: int = 1
|
||||
# Optional: the run id this diagnostic is scoped to. None = task-wide.
|
||||
run_id: Optional[int] = None
|
||||
# Optional structured payload for the UI (phantom ids, failure count).
|
||||
data: dict = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"kind": self.kind,
|
||||
"severity": self.severity,
|
||||
"title": self.title,
|
||||
"detail": self.detail,
|
||||
"actions": [a.to_dict() for a in self.actions],
|
||||
"first_seen_at": self.first_seen_at,
|
||||
"last_seen_at": self.last_seen_at,
|
||||
"count": self.count,
|
||||
"run_id": self.run_id,
|
||||
"data": self.data,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rule helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _task_field(task, name, default=None):
|
||||
"""Read a field from a task regardless of representation.
|
||||
|
||||
Callers pass sqlite3.Row (dict-like with [] but no attribute
|
||||
access), kanban_db.Task dataclasses (attribute access), or plain
|
||||
dicts (both). This normalises them so rule functions don't have
|
||||
to branch on type each time.
|
||||
"""
|
||||
if task is None:
|
||||
return default
|
||||
# sqlite Row + plain dicts both support mapping access; Row also
|
||||
# supports .keys().
|
||||
try:
|
||||
# Row raises IndexError if the key isn't a column in the query;
|
||||
# dicts return default via .get. Handle both.
|
||||
if hasattr(task, "keys") and name in task.keys():
|
||||
return task[name]
|
||||
except Exception:
|
||||
pass
|
||||
if isinstance(task, dict):
|
||||
return task.get(name, default)
|
||||
return getattr(task, name, default)
|
||||
|
||||
|
||||
def _parse_payload(ev) -> dict:
|
||||
"""Tolerate event.payload being either a dict or a JSON string."""
|
||||
p = _task_field(ev, "payload", None)
|
||||
if p is None:
|
||||
return {}
|
||||
if isinstance(p, dict):
|
||||
return p
|
||||
if isinstance(p, str):
|
||||
try:
|
||||
return json.loads(p) or {}
|
||||
except Exception:
|
||||
return {}
|
||||
return {}
|
||||
|
||||
|
||||
def _event_kind(ev) -> str:
|
||||
return _task_field(ev, "kind", "") or ""
|
||||
|
||||
|
||||
def _event_ts(ev) -> int:
|
||||
t = _task_field(ev, "created_at", 0)
|
||||
return int(t or 0)
|
||||
|
||||
|
||||
def _active_hallucination_events(
|
||||
events: Iterable[Any],
|
||||
kind: str,
|
||||
) -> list[Any]:
|
||||
"""Return events of ``kind`` that have no ``completed``/``edited``
|
||||
event *strictly after* them. Walks chronologically: each clean
|
||||
event resets the accumulator; each matching event gets appended.
|
||||
|
||||
Events must be sorted by id (i.e. arrival order); callers pass the
|
||||
task's full event list which the DB already returns in that order.
|
||||
"""
|
||||
# Events arrive sorted by id asc (chronological). Walk once, track
|
||||
# which hallucination events are still "active" (no clean event
|
||||
# supersedes them).
|
||||
active: list[Any] = []
|
||||
for ev in events:
|
||||
k = _event_kind(ev)
|
||||
if k in ("completed", "edited"):
|
||||
active.clear()
|
||||
elif k == kind:
|
||||
active.append(ev)
|
||||
return active
|
||||
|
||||
|
||||
def _latest_clean_event_ts(events: Iterable[Any]) -> int:
|
||||
"""Timestamp of the most recent clean completion / edit event.
|
||||
|
||||
Kept for general "has this task ever been successfully completed"
|
||||
lookups; hallucination rules use ``_active_hallucination_events``
|
||||
instead because they need strict ordering.
|
||||
"""
|
||||
latest = 0
|
||||
for ev in events:
|
||||
if _event_kind(ev) in ("completed", "edited"):
|
||||
t = _event_ts(ev)
|
||||
if t > latest:
|
||||
latest = t
|
||||
return latest
|
||||
|
||||
|
||||
# Standard always-available actions. Every diagnostic can offer these as
|
||||
# fallbacks regardless of kind — they're the two baseline recovery
|
||||
# primitives the kernel supports.
|
||||
def _generic_recovery_actions(task: Any, *, running: bool) -> list[DiagnosticAction]:
|
||||
out: list[DiagnosticAction] = []
|
||||
if running:
|
||||
out.append(DiagnosticAction(
|
||||
kind="reclaim",
|
||||
label="Reclaim task",
|
||||
payload={},
|
||||
))
|
||||
out.append(DiagnosticAction(
|
||||
kind="reassign",
|
||||
label="Reassign to different profile",
|
||||
payload={"reclaim_first": running},
|
||||
))
|
||||
return out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rule implementations
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Each rule takes (task, events, runs, now_ts, config) and returns
|
||||
# zero or more Diagnostic instances. ``events`` / ``runs`` are lists of
|
||||
# kanban_db.Event / kanban_db.Run (or plain dicts matching the same
|
||||
# shape — for test convenience).
|
||||
|
||||
RuleFn = Callable[[Any, list[Any], list[Any], int, dict], list[Diagnostic]]
|
||||
|
||||
|
||||
def _rule_hallucinated_cards(task, events, runs, now, cfg) -> list[Diagnostic]:
|
||||
"""Blocked-hallucination gate fires: a worker called kanban_complete
|
||||
with created_cards that didn't exist or weren't created by the
|
||||
completing profile. Task stayed in its prior state; the operator
|
||||
needs to decide how to proceed.
|
||||
|
||||
Auto-clears when a successful completion (or edit) follows the
|
||||
blocked event.
|
||||
"""
|
||||
hits = _active_hallucination_events(events, "completion_blocked_hallucination")
|
||||
if not hits:
|
||||
return []
|
||||
phantom_ids: list[str] = []
|
||||
first = _event_ts(hits[0])
|
||||
last = _event_ts(hits[-1])
|
||||
for ev in hits:
|
||||
payload = _parse_payload(ev)
|
||||
for pid in payload.get("phantom_cards", []) or []:
|
||||
if pid not in phantom_ids:
|
||||
phantom_ids.append(pid)
|
||||
running = _task_field(task, "status") == "running"
|
||||
actions: list[DiagnosticAction] = []
|
||||
actions.append(DiagnosticAction(
|
||||
kind="comment",
|
||||
label="Add a comment explaining what to do",
|
||||
suggested=False,
|
||||
))
|
||||
actions.extend(_generic_recovery_actions(task, running=running))
|
||||
return [Diagnostic(
|
||||
kind="hallucinated_cards",
|
||||
severity="error",
|
||||
title="Worker claimed cards that don't exist",
|
||||
detail=(
|
||||
f"The completing worker declared created_cards that either didn't "
|
||||
f"exist or weren't created by its profile. The completion was "
|
||||
f"blocked and the task stayed in its prior state. "
|
||||
f"Usually means the worker hallucinated ids instead of capturing "
|
||||
f"return values from kanban_create."
|
||||
),
|
||||
actions=actions,
|
||||
first_seen_at=first,
|
||||
last_seen_at=last,
|
||||
count=len(hits),
|
||||
data={"phantom_ids": phantom_ids},
|
||||
)]
|
||||
|
||||
|
||||
def _rule_prose_phantom_refs(task, events, runs, now, cfg) -> list[Diagnostic]:
|
||||
"""Advisory prose-scan: the completion summary mentions ``t_<hex>``
|
||||
ids that don't resolve. Non-blocking; surfaced as a warning only.
|
||||
|
||||
Auto-clears when a fresh clean completion arrives AFTER the
|
||||
suspected event.
|
||||
"""
|
||||
hits = _active_hallucination_events(events, "suspected_hallucinated_references")
|
||||
if not hits:
|
||||
return []
|
||||
phantom_refs: list[str] = []
|
||||
for ev in hits:
|
||||
for pid in _parse_payload(ev).get("phantom_refs", []) or []:
|
||||
if pid not in phantom_refs:
|
||||
phantom_refs.append(pid)
|
||||
running = _task_field(task, "status") == "running"
|
||||
return [Diagnostic(
|
||||
kind="prose_phantom_refs",
|
||||
severity="warning",
|
||||
title="Completion summary references unknown task ids",
|
||||
detail=(
|
||||
"The completion summary mentions task ids that don't resolve "
|
||||
"in this board's database. The completion itself succeeded, "
|
||||
"but downstream consumers parsing the summary may be pointed "
|
||||
"at cards that never existed."
|
||||
),
|
||||
actions=_generic_recovery_actions(task, running=running),
|
||||
first_seen_at=_event_ts(hits[0]),
|
||||
last_seen_at=_event_ts(hits[-1]),
|
||||
count=len(hits),
|
||||
data={"phantom_refs": phantom_refs},
|
||||
)]
|
||||
|
||||
|
||||
def _rule_repeated_failures(task, events, runs, now, cfg) -> list[Diagnostic]:
|
||||
"""Task's unified ``consecutive_failures`` counter is climbing —
|
||||
something about this task+profile combo is broken and each retry
|
||||
fails the same way. Triggers regardless of the specific failure
|
||||
mode (spawn error, timeout, crash) because operationally they
|
||||
all look the same: the kernel keeps retrying and the operator
|
||||
needs to intervene.
|
||||
|
||||
Threshold: cfg["failure_threshold"] (default 3). A threshold of 3
|
||||
is one below the circuit-breaker's default (5), so the diagnostic
|
||||
surfaces BEFORE the breaker trips — giving operators a window to
|
||||
fix the problem while the dispatcher's still retrying.
|
||||
|
||||
Accepts the legacy ``spawn_failure_threshold`` config key for
|
||||
back-compat.
|
||||
"""
|
||||
threshold = int(cfg.get(
|
||||
"failure_threshold",
|
||||
cfg.get("spawn_failure_threshold", 3),
|
||||
))
|
||||
# Read the new unified counter name, with a fallback to the legacy
|
||||
# column name so this rule keeps working against old DB rows the
|
||||
# caller somehow materialised without running the migration.
|
||||
failures = (
|
||||
_task_field(task, "consecutive_failures", None)
|
||||
if _task_field(task, "consecutive_failures", None) is not None
|
||||
else _task_field(task, "spawn_failures", 0)
|
||||
)
|
||||
if failures is None or failures < threshold:
|
||||
return []
|
||||
last_err = (
|
||||
_task_field(task, "last_failure_error", None)
|
||||
if _task_field(task, "last_failure_error", None) is not None
|
||||
else _task_field(task, "last_spawn_error", None)
|
||||
)
|
||||
assignee = _task_field(task, "assignee")
|
||||
|
||||
# Classify the most recent failure by peeking at run outcomes so
|
||||
# the title + suggested action can be specific without a separate
|
||||
# per-outcome rule.
|
||||
ordered_runs = sorted(runs, key=lambda r: _task_field(r, "id", 0))
|
||||
most_recent_outcome = None
|
||||
for r in reversed(ordered_runs):
|
||||
oc = _task_field(r, "outcome")
|
||||
if oc in ("spawn_failed", "timed_out", "crashed"):
|
||||
most_recent_outcome = oc
|
||||
break
|
||||
|
||||
actions: list[DiagnosticAction] = []
|
||||
if most_recent_outcome == "spawn_failed" and assignee and assignee != "default":
|
||||
# Spawn is failing specifically — profile setup issue.
|
||||
actions.append(DiagnosticAction(
|
||||
kind="cli_hint",
|
||||
label=f"Verify profile: hermes -p {assignee} doctor",
|
||||
payload={"command": f"hermes -p {assignee} doctor"},
|
||||
suggested=True,
|
||||
))
|
||||
actions.append(DiagnosticAction(
|
||||
kind="cli_hint",
|
||||
label=f"Fix profile auth: hermes -p {assignee} auth",
|
||||
payload={"command": f"hermes -p {assignee} auth"},
|
||||
))
|
||||
elif most_recent_outcome in ("timed_out", "crashed"):
|
||||
# Worker got off the ground but died. Logs are the right place
|
||||
# to diagnose; reclaim/reassign are the recovery levers.
|
||||
task_id = _task_field(task, "id")
|
||||
if task_id:
|
||||
actions.append(DiagnosticAction(
|
||||
kind="cli_hint",
|
||||
label=f"Check logs: hermes kanban log {task_id}",
|
||||
payload={"command": f"hermes kanban log {task_id}"},
|
||||
suggested=True,
|
||||
))
|
||||
actions.extend(_generic_recovery_actions(
|
||||
task, running=_task_field(task, "status") == "running",
|
||||
))
|
||||
|
||||
severity = "critical" if failures >= threshold * 2 else "error"
|
||||
err_text = (last_err or "").strip() if last_err else ""
|
||||
err_snippet = err_text[:500] + ("…" if len(err_text) > 500 else "") if err_text else ""
|
||||
outcome_label = {
|
||||
"spawn_failed": "spawn",
|
||||
"timed_out": "timeout",
|
||||
"crashed": "crash",
|
||||
}.get(most_recent_outcome or "", "failure")
|
||||
if err_snippet:
|
||||
title = f"Agent {outcome_label} x{failures}: {err_snippet.splitlines()[0][:160]}"
|
||||
detail = (
|
||||
f"This task has failed {failures} times in a row "
|
||||
f"(most recent: {outcome_label}). Full last error:\n\n"
|
||||
f"{err_snippet}\n\n"
|
||||
f"The dispatcher will keep retrying until the consecutive-"
|
||||
f"failures counter trips the circuit breaker (default 5), "
|
||||
f"at which point the task auto-blocks. Fix the root cause "
|
||||
f"and reclaim to retry."
|
||||
)
|
||||
else:
|
||||
title = f"Agent {outcome_label} x{failures} (no error recorded)"
|
||||
detail = (
|
||||
f"This task has failed {failures} times in a row "
|
||||
f"(most recent: {outcome_label}) but no error text was "
|
||||
f"captured. Check the suggested command or the worker log."
|
||||
)
|
||||
return [Diagnostic(
|
||||
kind="repeated_failures",
|
||||
severity=severity,
|
||||
title=title,
|
||||
detail=detail,
|
||||
actions=actions,
|
||||
first_seen_at=now,
|
||||
last_seen_at=now,
|
||||
count=failures,
|
||||
data={
|
||||
"consecutive_failures": failures,
|
||||
"most_recent_outcome": most_recent_outcome,
|
||||
"last_error": last_err,
|
||||
},
|
||||
)]
|
||||
|
||||
|
||||
def _rule_repeated_crashes(task, events, runs, now, cfg) -> list[Diagnostic]:
|
||||
"""The worker spawns fine but keeps crashing mid-run. Check the last
|
||||
N runs' outcomes; N consecutive ``crashed`` without a successful
|
||||
``completed`` means something about the task + profile combo is
|
||||
broken (OOM, missing dependency, tool it needs is down).
|
||||
|
||||
Threshold: cfg["crash_threshold"] (default 2).
|
||||
|
||||
Narrower than ``repeated_failures`` — fires earlier (2 crashes vs 3
|
||||
total failures) so the operator gets a crash-specific heads-up
|
||||
before the unified rule kicks in. Suppresses itself when the
|
||||
unified rule is also about to fire, to avoid double-flagging.
|
||||
"""
|
||||
failure_threshold = int(cfg.get(
|
||||
"failure_threshold",
|
||||
cfg.get("spawn_failure_threshold", 3),
|
||||
))
|
||||
unified_counter = (
|
||||
_task_field(task, "consecutive_failures", 0) or 0
|
||||
)
|
||||
# Unified rule will catch this — let it handle to avoid double fire.
|
||||
if unified_counter >= failure_threshold:
|
||||
return []
|
||||
|
||||
threshold = int(cfg.get("crash_threshold", 2))
|
||||
ordered = sorted(runs, key=lambda r: _task_field(r, "id", 0))
|
||||
# Count trailing consecutive 'crashed' outcomes.
|
||||
consecutive = 0
|
||||
last_err = None
|
||||
for r in reversed(ordered):
|
||||
outcome = _task_field(r, "outcome")
|
||||
if outcome == "crashed":
|
||||
consecutive += 1
|
||||
if last_err is None:
|
||||
last_err = _task_field(r, "error")
|
||||
elif outcome in ("completed", "reclaimed"):
|
||||
# A success (or manual reclaim) breaks the streak.
|
||||
break
|
||||
else:
|
||||
# Other outcomes (timed_out, blocked, spawn_failed, gave_up)
|
||||
# aren't crash signals — don't count them, but they also
|
||||
# don't break the crash streak.
|
||||
continue
|
||||
if consecutive < threshold:
|
||||
return []
|
||||
task_id = _task_field(task, "id")
|
||||
actions: list[DiagnosticAction] = []
|
||||
if task_id:
|
||||
actions.append(DiagnosticAction(
|
||||
kind="cli_hint",
|
||||
label=f"Check logs: hermes kanban log {task_id}",
|
||||
payload={"command": f"hermes kanban log {task_id}"},
|
||||
suggested=True,
|
||||
))
|
||||
running = _task_field(task, "status") == "running"
|
||||
actions.extend(_generic_recovery_actions(task, running=running))
|
||||
severity = "critical" if consecutive >= threshold * 2 else "error"
|
||||
# Put the actual error up-front so operators see WHAT broke without
|
||||
# having to open the logs. Truncate defensively — these can be huge
|
||||
# (full tracebacks).
|
||||
err_text = (last_err or "").strip() if last_err else ""
|
||||
err_snippet = err_text[:500] + ("…" if len(err_text) > 500 else "") if err_text else ""
|
||||
if err_snippet:
|
||||
title = f"Agent crashed {consecutive}x: {err_snippet.splitlines()[0][:160]}"
|
||||
detail = (
|
||||
f"The last {consecutive} runs ended with outcome=crashed. "
|
||||
f"Full last error:\n\n{err_snippet}"
|
||||
)
|
||||
else:
|
||||
title = f"Agent crashed {consecutive}x (no error recorded)"
|
||||
detail = (
|
||||
f"The last {consecutive} runs ended with outcome=crashed but "
|
||||
f"no error text was captured. Check the worker log for more."
|
||||
)
|
||||
return [Diagnostic(
|
||||
kind="repeated_crashes",
|
||||
severity=severity,
|
||||
title=title,
|
||||
detail=detail,
|
||||
actions=actions,
|
||||
first_seen_at=now,
|
||||
last_seen_at=now,
|
||||
count=consecutive,
|
||||
data={"consecutive_crashes": consecutive, "last_error": last_err},
|
||||
)]
|
||||
|
||||
|
||||
def _rule_stuck_in_blocked(task, events, runs, now, cfg) -> list[Diagnostic]:
|
||||
"""Task has been in ``blocked`` status for too long without a comment.
|
||||
|
||||
Threshold: cfg["blocked_stale_hours"] (default 24).
|
||||
Surfaced as a warning so humans know there's a pending unblock.
|
||||
"""
|
||||
hours = float(cfg.get("blocked_stale_hours", 24))
|
||||
status = _task_field(task, "status")
|
||||
if status != "blocked":
|
||||
return []
|
||||
# Find the most recent ``blocked`` event.
|
||||
last_blocked_ts = 0
|
||||
for ev in events:
|
||||
if _event_kind(ev) == "blocked":
|
||||
t = _event_ts(ev)
|
||||
if t > last_blocked_ts:
|
||||
last_blocked_ts = t
|
||||
if last_blocked_ts == 0:
|
||||
return []
|
||||
age_hours = (now - last_blocked_ts) / 3600.0
|
||||
if age_hours < hours:
|
||||
return []
|
||||
# Any comment / unblock after the block breaks the "stale" signal.
|
||||
for ev in events:
|
||||
if _event_kind(ev) in ("commented", "unblocked") and _event_ts(ev) > last_blocked_ts:
|
||||
return []
|
||||
actions: list[DiagnosticAction] = [
|
||||
DiagnosticAction(
|
||||
kind="comment",
|
||||
label="Add a comment / unblock the task",
|
||||
suggested=True,
|
||||
),
|
||||
]
|
||||
return [Diagnostic(
|
||||
kind="stuck_in_blocked",
|
||||
severity="warning",
|
||||
title=f"Task has been blocked for {int(age_hours)}h",
|
||||
detail=(
|
||||
f"This task transitioned to blocked {int(age_hours)}h ago and "
|
||||
f"has had no comments or unblock attempts since. Blocked tasks "
|
||||
f"are waiting for human input — check the block reason and "
|
||||
f"either unblock with feedback or answer with a comment."
|
||||
),
|
||||
actions=actions,
|
||||
first_seen_at=last_blocked_ts,
|
||||
last_seen_at=last_blocked_ts,
|
||||
count=1,
|
||||
data={"blocked_at": last_blocked_ts, "age_hours": round(age_hours, 1)},
|
||||
)]
|
||||
|
||||
|
||||
# Registry — order matters: rules higher on the list render first when
|
||||
# severity ties. Add new rules here.
|
||||
_RULES: list[RuleFn] = [
|
||||
_rule_hallucinated_cards,
|
||||
_rule_prose_phantom_refs,
|
||||
_rule_repeated_failures,
|
||||
_rule_repeated_crashes,
|
||||
_rule_stuck_in_blocked,
|
||||
]
|
||||
|
||||
|
||||
# Known kinds (for the UI's filter / legend / i18n keys). Update when
|
||||
# rules are added.
|
||||
DIAGNOSTIC_KINDS = (
|
||||
"hallucinated_cards",
|
||||
"prose_phantom_refs",
|
||||
"repeated_failures",
|
||||
"repeated_crashes",
|
||||
"stuck_in_blocked",
|
||||
)
|
||||
|
||||
|
||||
DEFAULT_CONFIG = {
|
||||
"failure_threshold": 3,
|
||||
# Legacy alias accepted at read time by _rule_repeated_failures.
|
||||
"spawn_failure_threshold": 3,
|
||||
"crash_threshold": 2,
|
||||
"blocked_stale_hours": 24,
|
||||
}
|
||||
|
||||
|
||||
def compute_task_diagnostics(
|
||||
task,
|
||||
events: list,
|
||||
runs: list,
|
||||
*,
|
||||
now: Optional[int] = None,
|
||||
config: Optional[dict] = None,
|
||||
) -> list[Diagnostic]:
|
||||
"""Run every rule against a single task's state and return a
|
||||
severity-sorted list of active diagnostics.
|
||||
|
||||
Sorting: critical first, then error, then warning; ties broken by
|
||||
most-recent ``last_seen_at``.
|
||||
"""
|
||||
now_ts = int(now if now is not None else time.time())
|
||||
cfg = {**DEFAULT_CONFIG, **(config or {})}
|
||||
out: list[Diagnostic] = []
|
||||
for rule in _RULES:
|
||||
try:
|
||||
out.extend(rule(task, events, runs, now_ts, cfg))
|
||||
except Exception:
|
||||
# A broken rule must never crash the dashboard. Rule bugs
|
||||
# get caught in tests; in production we'd rather drop the
|
||||
# diagnostic than 500 a whole /board request.
|
||||
continue
|
||||
severity_idx = {s: i for i, s in enumerate(SEVERITY_ORDER)}
|
||||
out.sort(
|
||||
key=lambda d: (
|
||||
-severity_idx.get(d.severity, -1),
|
||||
-(d.last_seen_at or 0),
|
||||
)
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def severity_of_highest(diagnostics: Iterable[Diagnostic]) -> Optional[str]:
|
||||
"""Highest severity present in the list, or None if empty. Useful
|
||||
for card badges that need a single color."""
|
||||
highest_idx = -1
|
||||
highest = None
|
||||
for d in diagnostics:
|
||||
idx = SEVERITY_ORDER.index(d.severity) if d.severity in SEVERITY_ORDER else -1
|
||||
if idx > highest_idx:
|
||||
highest_idx = idx
|
||||
highest = d.severity
|
||||
return highest
|
||||
+838
-253
File diff suppressed because it is too large
Load Diff
@@ -361,7 +361,7 @@ def _write_env_vars(env_path: Path, env_writes: dict) -> None:
|
||||
|
||||
existing_lines = []
|
||||
if env_path.exists():
|
||||
existing_lines = env_path.read_text().splitlines()
|
||||
existing_lines = env_path.read_text(encoding="utf-8").splitlines()
|
||||
|
||||
updated_keys = set()
|
||||
new_lines = []
|
||||
|
||||
@@ -393,14 +393,21 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
|
||||
if provider in _AGGREGATOR_PROVIDERS:
|
||||
return _prepend_vendor(name)
|
||||
|
||||
# --- OpenCode Zen: Claude stays hyphenated; other models keep dots ---
|
||||
if provider == "opencode-zen":
|
||||
bare = _strip_matching_provider_prefix(name, provider)
|
||||
if "/" in bare:
|
||||
return bare
|
||||
if bare.lower().startswith("claude-"):
|
||||
return _dots_to_hyphens(bare)
|
||||
return bare
|
||||
# --- OpenCode Zen / OpenCode Go: flat-namespace resellers.
|
||||
# Their /v1/models API returns bare IDs only (no vendor prefix), and
|
||||
# the inference endpoint rejects vendor-prefixed names with HTTP 401
|
||||
# "Model not supported". Strip ANY leading ``vendor/`` so config
|
||||
# entries like ``minimax/minimax-m2.7`` or ``deepseek/deepseek-v4-flash``
|
||||
# — commonly copied from aggregator slugs into fallback_model lists —
|
||||
# resolve to bare ``minimax-m2.7`` / ``deepseek-v4-flash`` the API
|
||||
# actually serves. See PR reviewing opencode-go fallback 401s. ---
|
||||
if provider in {"opencode-zen", "opencode-go"}:
|
||||
if "/" in name:
|
||||
_, bare_after_slash = name.split("/", 1)
|
||||
name = bare_after_slash.strip() or name
|
||||
if provider == "opencode-zen" and name.lower().startswith("claude-"):
|
||||
return _dots_to_hyphens(name)
|
||||
return name
|
||||
|
||||
# --- Anthropic: strip matching provider prefix, dots -> hyphens ---
|
||||
if provider in _DOT_TO_HYPHEN_PROVIDERS:
|
||||
|
||||
+183
-26
@@ -190,11 +190,18 @@ def _load_direct_aliases() -> dict[str, DirectAlias]:
|
||||
model: "minimax-m2.7"
|
||||
provider: custom
|
||||
base_url: "https://ollama.com/v1"
|
||||
|
||||
Also reads ``model.aliases`` (set by ``hermes config set model.aliases.xxx``)
|
||||
and converts simple string entries (``ds-flash: deepseek/deepseek-v4-flash``)
|
||||
into DirectAlias objects. The provider is parsed from the ``provider/``
|
||||
prefix in the value; if no slash, the current provider is used.
|
||||
"""
|
||||
merged = dict(_BUILTIN_DIRECT_ALIASES)
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
|
||||
# --- model_aliases (dict-based format) ---
|
||||
user_aliases = cfg.get("model_aliases")
|
||||
if isinstance(user_aliases, dict):
|
||||
for name, entry in user_aliases.items():
|
||||
@@ -207,6 +214,30 @@ def _load_direct_aliases() -> dict[str, DirectAlias]:
|
||||
merged[name.strip().lower()] = DirectAlias(
|
||||
model=model, provider=provider, base_url=base_url,
|
||||
)
|
||||
|
||||
# --- model.aliases (string-based format, from config set) ---
|
||||
model_section = cfg.get("model", {})
|
||||
if isinstance(model_section, dict):
|
||||
simple_aliases = model_section.get("aliases")
|
||||
if isinstance(simple_aliases, dict):
|
||||
current_provider = model_section.get("provider", "")
|
||||
for name, value in simple_aliases.items():
|
||||
if not isinstance(value, str) or not value.strip():
|
||||
continue
|
||||
key = name.strip().lower()
|
||||
if key in merged:
|
||||
continue # don't override explicit model_aliases entries
|
||||
val = value.strip()
|
||||
if "/" in val:
|
||||
provider, model = val.split("/", 1)
|
||||
else:
|
||||
provider = current_provider
|
||||
model = val
|
||||
merged[key] = DirectAlias(
|
||||
model=model.strip(),
|
||||
provider=provider.strip() or current_provider,
|
||||
base_url="",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return merged
|
||||
@@ -768,6 +799,12 @@ def switch_model(
|
||||
)
|
||||
|
||||
# --- Step d: Aggregator catalog search ---
|
||||
# Track whether the live catalog of the CURRENT provider resolved the
|
||||
# model — if so, step e must not second-guess and switch providers.
|
||||
# Critical for flat-namespace resellers like opencode-go / opencode-zen
|
||||
# whose live /v1/models returns bare IDs (e.g. "deepseek-v4-flash") that
|
||||
# coincidentally match entries in native providers' static catalogs.
|
||||
resolved_in_current_catalog = False
|
||||
if is_aggregator(target_provider) and not resolved_alias:
|
||||
catalog = list_provider_models(target_provider)
|
||||
if catalog:
|
||||
@@ -775,6 +812,7 @@ def switch_model(
|
||||
for mid in catalog:
|
||||
if mid.lower() == new_model_lower:
|
||||
new_model = mid
|
||||
resolved_in_current_catalog = True
|
||||
break
|
||||
else:
|
||||
for mid in catalog:
|
||||
@@ -782,6 +820,7 @@ def switch_model(
|
||||
_, bare = mid.split("/", 1)
|
||||
if bare.lower() == new_model_lower:
|
||||
new_model = mid
|
||||
resolved_in_current_catalog = True
|
||||
break
|
||||
|
||||
# --- Step e: detect_provider_for_model() as last resort ---
|
||||
@@ -794,6 +833,7 @@ def switch_model(
|
||||
target_provider == current_provider
|
||||
and not is_custom
|
||||
and not resolved_alias
|
||||
and not resolved_in_current_catalog
|
||||
):
|
||||
detected = detect_provider_for_model(new_model, current_provider)
|
||||
if detected:
|
||||
@@ -891,12 +931,37 @@ def switch_model(
|
||||
if not validation.get("accepted"):
|
||||
override = False
|
||||
if user_providers:
|
||||
for up in user_providers:
|
||||
if isinstance(up, dict) and up.get("provider") == target_provider:
|
||||
cfg_models = up.get("models", [])
|
||||
if new_model in cfg_models or any(
|
||||
m.get("name") == new_model for m in cfg_models if isinstance(m, dict)
|
||||
):
|
||||
# user_providers is a dict: {provider_slug: config_dict}
|
||||
for slug, cfg in user_providers.items():
|
||||
if slug == target_provider:
|
||||
cfg_models = cfg.get("models", {})
|
||||
# Direct membership works for dict (keys) and list (strings)
|
||||
if new_model in cfg_models:
|
||||
override = True
|
||||
break
|
||||
# Also accept if models is a list of dicts with 'name' field
|
||||
if isinstance(cfg_models, list):
|
||||
if any(m.get("name") == new_model for m in cfg_models if isinstance(m, dict)):
|
||||
override = True
|
||||
break
|
||||
# Also check custom_providers list — models declared there should be accepted
|
||||
# even if the remote /v1/models endpoint doesn't list them.
|
||||
if not override and custom_providers and isinstance(custom_providers, list):
|
||||
for entry in custom_providers:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
# Match by provider slug (custom:<name>) or by base_url
|
||||
entry_name = entry.get("name", "")
|
||||
entry_slug = f"custom:{entry_name}" if entry_name else ""
|
||||
entry_url = entry.get("base_url", "")
|
||||
if entry_slug == target_provider or entry_url == base_url:
|
||||
# Check if the requested model matches the entry's model
|
||||
entry_model = entry.get("model", "")
|
||||
entry_models = entry.get("models", {})
|
||||
if new_model == entry_model:
|
||||
override = True
|
||||
break
|
||||
if isinstance(entry_models, dict) and new_model in entry_models:
|
||||
override = True
|
||||
break
|
||||
if override:
|
||||
@@ -1052,6 +1117,45 @@ def list_authenticated_providers(
|
||||
if normed:
|
||||
_builtin_endpoints.add(normed)
|
||||
|
||||
def _has_fast_aws_sdk_signal() -> bool:
|
||||
"""Return True when explicit AWS auth config is present.
|
||||
|
||||
This intentionally avoids botocore's full credential chain. Provider
|
||||
picker/model-switch discovery can run for non-Bedrock providers, and
|
||||
botocore may otherwise probe EC2 IMDS (169.254.169.254) on local
|
||||
machines before returning no credentials.
|
||||
"""
|
||||
if os.environ.get("AWS_BEARER_TOKEN_BEDROCK", "").strip():
|
||||
return True
|
||||
if (
|
||||
os.environ.get("AWS_ACCESS_KEY_ID", "").strip()
|
||||
and os.environ.get("AWS_SECRET_ACCESS_KEY", "").strip()
|
||||
):
|
||||
return True
|
||||
return any(
|
||||
os.environ.get(name, "").strip()
|
||||
for name in (
|
||||
"AWS_PROFILE",
|
||||
"AWS_CONTAINER_CREDENTIALS_RELATIVE_URI",
|
||||
"AWS_CONTAINER_CREDENTIALS_FULL_URI",
|
||||
"AWS_WEB_IDENTITY_TOKEN_FILE",
|
||||
)
|
||||
)
|
||||
|
||||
def _has_aws_sdk_creds_for_listing(slug: str) -> bool:
|
||||
"""Credential check for AWS SDK providers in non-runtime discovery."""
|
||||
slug_norm = str(slug or "").strip().lower()
|
||||
current_norm = str(current_provider or "").strip().lower()
|
||||
if _has_fast_aws_sdk_signal():
|
||||
return True
|
||||
if slug_norm != current_norm:
|
||||
return False
|
||||
try:
|
||||
from agent.bedrock_adapter import has_aws_credentials
|
||||
return bool(has_aws_credentials())
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
data = fetch_models_dev()
|
||||
|
||||
# Build curated model lists keyed by hermes provider ID
|
||||
@@ -1179,7 +1283,9 @@ def list_authenticated_providers(
|
||||
|
||||
# Check if credentials exist
|
||||
has_creds = False
|
||||
if overlay.extra_env_vars:
|
||||
if overlay.auth_type == "aws_sdk":
|
||||
has_creds = _has_aws_sdk_creds_for_listing(hermes_slug)
|
||||
elif overlay.extra_env_vars:
|
||||
has_creds = any(os.environ.get(ev) for ev in overlay.extra_env_vars)
|
||||
# Also check api_key_env_vars from PROVIDER_REGISTRY for api_key auth_type
|
||||
if not has_creds and overlay.auth_type == "api_key":
|
||||
@@ -1198,11 +1304,7 @@ def list_authenticated_providers(
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
providers_store = store.get("providers", {})
|
||||
pool_store = store.get("credential_pool", {})
|
||||
if store and (
|
||||
pid in providers_store or hermes_slug in providers_store
|
||||
or pid in pool_store or hermes_slug in pool_store
|
||||
):
|
||||
if store and (pid in providers_store or hermes_slug in providers_store):
|
||||
has_creds = True
|
||||
except Exception as exc:
|
||||
logger.debug("Auth store check failed for %s: %s", pid, exc)
|
||||
@@ -1298,11 +1400,7 @@ def list_authenticated_providers(
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
_cp_store = _load_auth_store()
|
||||
_cp_providers_store = _cp_store.get("providers", {})
|
||||
_cp_pool_store = _cp_store.get("credential_pool", {})
|
||||
if _cp_store and (
|
||||
_cp.slug in _cp_providers_store
|
||||
or _cp.slug in _cp_pool_store
|
||||
):
|
||||
if _cp_store and _cp.slug in _cp_providers_store:
|
||||
_cp_has_creds = True
|
||||
except Exception:
|
||||
pass
|
||||
@@ -1319,11 +1417,7 @@ def list_authenticated_providers(
|
||||
# credentials come from the boto3 credential chain (env vars,
|
||||
# ~/.aws/credentials, instance roles, etc.)
|
||||
if not _cp_has_creds and _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
|
||||
try:
|
||||
from agent.bedrock_adapter import has_aws_credentials
|
||||
_cp_has_creds = has_aws_credentials()
|
||||
except Exception:
|
||||
pass
|
||||
_cp_has_creds = _has_aws_sdk_creds_for_listing(_cp.slug)
|
||||
|
||||
if not _cp_has_creds:
|
||||
continue
|
||||
@@ -1412,14 +1506,17 @@ def list_authenticated_providers(
|
||||
models_list = list(fb)
|
||||
|
||||
# Prefer the endpoint's live /models list when credentials are
|
||||
# available. This keeps OpenAI-compatible relays (for example CRS)
|
||||
# in sync when the server catalog changes without requiring the
|
||||
# user to mirror every model into config.yaml.
|
||||
# available, unless the provider explicitly opts out via
|
||||
# discover_models: false (e.g. dedicated endpoints that expose
|
||||
# the entire aggregator catalog via /models).
|
||||
api_key = str(ep_cfg.get("api_key", "") or "").strip()
|
||||
if not api_key:
|
||||
key_env = str(ep_cfg.get("key_env", "") or "").strip()
|
||||
api_key = os.environ.get(key_env, "").strip() if key_env else ""
|
||||
if api_url and api_key:
|
||||
discover = ep_cfg.get("discover_models", True)
|
||||
if isinstance(discover, str):
|
||||
discover = discover.lower() not in ("false", "no", "0")
|
||||
if api_url and api_key and discover:
|
||||
try:
|
||||
from hermes_cli.models import fetch_api_models
|
||||
live_models = fetch_api_models(api_key, api_url)
|
||||
@@ -1595,3 +1692,63 @@ def list_authenticated_providers(
|
||||
results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def list_picker_providers(
|
||||
current_provider: str = "",
|
||||
current_base_url: str = "",
|
||||
user_providers: dict = None,
|
||||
custom_providers: list | None = None,
|
||||
max_models: int = 8,
|
||||
current_model: str = "",
|
||||
) -> List[dict]:
|
||||
"""Interactive-picker variant of :func:`list_authenticated_providers`.
|
||||
|
||||
Post-processes the base list so the ``/model`` picker (Telegram/Discord
|
||||
inline keyboards) only surfaces models that are actually callable in the
|
||||
current install:
|
||||
|
||||
- OpenRouter's model list is replaced with the output of
|
||||
:func:`hermes_cli.models.fetch_openrouter_models`, which filters the
|
||||
curated ``OPENROUTER_MODELS`` snapshot against the live OpenRouter
|
||||
catalog. IDs the live catalog no longer carries drop out, so the
|
||||
picker never offers a model the user can't call.
|
||||
- Provider rows whose model list ends up empty are dropped, except
|
||||
custom endpoints (``is_user_defined=True`` with an ``api_url``) where
|
||||
the user may supply their own model set through config.
|
||||
|
||||
All other providers and metadata fields are passed through unchanged.
|
||||
The typed ``/model <name>`` path is unaffected -- only the interactive
|
||||
picker payload is narrowed.
|
||||
"""
|
||||
from hermes_cli.models import fetch_openrouter_models
|
||||
|
||||
providers = list_authenticated_providers(
|
||||
current_provider=current_provider,
|
||||
current_base_url=current_base_url,
|
||||
user_providers=user_providers,
|
||||
custom_providers=custom_providers,
|
||||
max_models=max_models,
|
||||
current_model=current_model,
|
||||
)
|
||||
|
||||
filtered: List[dict] = []
|
||||
for p in providers:
|
||||
slug = str(p.get("slug", "")).lower()
|
||||
if slug == "openrouter":
|
||||
try:
|
||||
live = fetch_openrouter_models()
|
||||
live_ids = [mid for mid, _ in live]
|
||||
except Exception:
|
||||
live_ids = list(p.get("models", []))
|
||||
p = dict(p)
|
||||
p["models"] = live_ids[:max_models]
|
||||
p["total_models"] = len(live_ids)
|
||||
|
||||
has_models = bool(p.get("models"))
|
||||
is_custom_endpoint = bool(p.get("is_user_defined")) and bool(p.get("api_url"))
|
||||
if not has_models and not is_custom_endpoint:
|
||||
continue
|
||||
filtered.append(p)
|
||||
|
||||
return filtered
|
||||
|
||||
+87
-10
@@ -40,6 +40,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("anthropic/claude-sonnet-4.5", ""),
|
||||
("anthropic/claude-haiku-4.5", ""),
|
||||
("openrouter/elephant-alpha", "free"),
|
||||
("openrouter/owl-alpha", "free"),
|
||||
("openai/gpt-5.5", ""),
|
||||
("openai/gpt-5.4-mini", ""),
|
||||
("xiaomi/mimo-v2.5-pro", ""),
|
||||
@@ -60,12 +61,14 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("z-ai/glm-5v-turbo", ""),
|
||||
("z-ai/glm-5-turbo", ""),
|
||||
("x-ai/grok-4.20", ""),
|
||||
("x-ai/grok-4.3", ""),
|
||||
("nvidia/nemotron-3-super-120b-a12b", ""),
|
||||
("nvidia/nemotron-3-super-120b-a12b:free", "free"),
|
||||
("arcee-ai/trinity-large-preview:free", "free"),
|
||||
("arcee-ai/trinity-large-thinking", ""),
|
||||
("openai/gpt-5.5-pro", ""),
|
||||
("openai/gpt-5.4-nano", ""),
|
||||
("deepseek/deepseek-v4-pro", ""),
|
||||
]
|
||||
|
||||
_openrouter_catalog_cache: list[tuple[str, str]] | None = None
|
||||
@@ -180,10 +183,12 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"z-ai/glm-5v-turbo",
|
||||
"z-ai/glm-5-turbo",
|
||||
"x-ai/grok-4.20-beta",
|
||||
"x-ai/grok-4.3",
|
||||
"nvidia/nemotron-3-super-120b-a12b",
|
||||
"arcee-ai/trinity-large-thinking",
|
||||
"openai/gpt-5.5-pro",
|
||||
"openai/gpt-5.4-nano",
|
||||
"deepseek/deepseek-v4-pro",
|
||||
],
|
||||
# Native OpenAI Chat Completions (api.openai.com). Used by /model counts and
|
||||
# provider_model_ids fallback when /v1/models is unavailable.
|
||||
@@ -773,7 +778,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"),
|
||||
ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"),
|
||||
ProviderEntry("lmstudio", "LM Studio", "LM Studio (local desktop app with built-in model server)"),
|
||||
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
|
||||
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
|
||||
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
|
||||
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
|
||||
@@ -803,8 +807,28 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
|
||||
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
|
||||
ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
|
||||
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway"),
|
||||
]
|
||||
|
||||
# Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/
|
||||
# that is not already in the list above. Adding plugins/model-providers/<name>/
|
||||
# is sufficient to expose a new provider in the model picker, /model, and all
|
||||
# downstream consumers — no edits to this file needed.
|
||||
_canonical_slugs = {p.slug for p in CANONICAL_PROVIDERS}
|
||||
try:
|
||||
from providers import list_providers as _list_providers_for_canonical
|
||||
for _pp in _list_providers_for_canonical():
|
||||
if _pp.name in _canonical_slugs:
|
||||
continue
|
||||
if _pp.auth_type in ("oauth_device_code", "oauth_external", "external_process", "aws_sdk", "copilot"):
|
||||
continue # non-api-key flows need bespoke picker UX; skip auto-inject
|
||||
_label = _pp.display_name or _pp.name
|
||||
_desc = _pp.description or f"{_label} (direct API)"
|
||||
CANONICAL_PROVIDERS.append(ProviderEntry(_pp.name, _label, _desc))
|
||||
_canonical_slugs.add(_pp.name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Derived dicts — used throughout the codebase
|
||||
_PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
|
||||
_PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider
|
||||
@@ -1739,10 +1763,20 @@ def model_supports_fast_mode(model_id: Optional[str]) -> bool:
|
||||
|
||||
|
||||
def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
|
||||
"""Return True if the model is a Claude model eligible for Anthropic Fast Mode."""
|
||||
"""Return True if the model is a Claude model eligible for Anthropic Fast Mode.
|
||||
|
||||
Fast mode is currently supported on Claude Opus 4.6 only. Per Anthropic's
|
||||
docs (https://platform.claude.com/docs/en/build-with-claude/fast-mode):
|
||||
"Fast mode is currently supported on Opus 4.6 only. Sending speed: fast
|
||||
with an unsupported model returns an error." Opus 4.7 explicitly rejects
|
||||
the ``speed`` parameter with HTTP 400.
|
||||
"""
|
||||
raw = _strip_vendor_prefix(str(model_id or ""))
|
||||
base = raw.split(":")[0]
|
||||
return base.startswith("claude-")
|
||||
if not base.startswith("claude-"):
|
||||
return False
|
||||
# Only Opus 4.6 supports fast mode at present.
|
||||
return "opus-4-6" in base or "opus-4.6" in base
|
||||
|
||||
|
||||
def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
|
||||
@@ -2012,6 +2046,34 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
|
||||
return ids
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ── Profile-based generic live fetch (all simple api-key providers) ──
|
||||
# Handles any provider registered in providers/ with auth_type="api_key".
|
||||
# Replaces per-provider copy-paste blocks (stepfun, gmi, zai, etc.).
|
||||
try:
|
||||
from providers import get_provider_profile
|
||||
from hermes_cli.auth import resolve_api_key_provider_credentials
|
||||
|
||||
_p = get_provider_profile(normalized)
|
||||
if _p and _p.auth_type == "api_key" and _p.base_url:
|
||||
try:
|
||||
creds = resolve_api_key_provider_credentials(normalized)
|
||||
api_key = str(creds.get("api_key") or "").strip()
|
||||
base_url = str(creds.get("base_url") or "").strip()
|
||||
except Exception:
|
||||
api_key, base_url = "", _p.base_url
|
||||
if not base_url:
|
||||
base_url = _p.base_url
|
||||
if api_key:
|
||||
live = _p.fetch_models(api_key=api_key)
|
||||
if live:
|
||||
return live
|
||||
# Use profile's fallback_models if defined
|
||||
if _p.fallback_models:
|
||||
return list(_p.fallback_models)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
curated_static = list(_PROVIDER_MODELS.get(normalized, []))
|
||||
if normalized in _MODELS_DEV_PREFERRED:
|
||||
return _merge_with_models_dev(normalized, curated_static)
|
||||
@@ -2895,6 +2957,19 @@ def fetch_api_models(
|
||||
_OLLAMA_CLOUD_CACHE_TTL = 3600 # 1 hour
|
||||
|
||||
|
||||
def _strip_ollama_cloud_suffix(model_id: str) -> str:
|
||||
"""Strip :cloud / -cloud suffixes that models.dev appends to Ollama Cloud IDs.
|
||||
|
||||
The live API uses clean IDs (e.g. 'kimi-k2.6') while models.dev sometimes
|
||||
returns them as 'kimi-k2.6:cloud'. Normalising before the dedup merge
|
||||
prevents duplicate entries in the merged model list.
|
||||
"""
|
||||
for suffix in (":cloud", "-cloud"):
|
||||
if model_id.endswith(suffix):
|
||||
return model_id[: -len(suffix)]
|
||||
return model_id
|
||||
|
||||
|
||||
def _ollama_cloud_cache_path() -> Path:
|
||||
"""Return the path for the Ollama Cloud model cache."""
|
||||
from hermes_constants import get_hermes_home
|
||||
@@ -2990,9 +3065,10 @@ def fetch_ollama_cloud_models(
|
||||
seen.add(m)
|
||||
merged.append(m)
|
||||
for m in mdev_models:
|
||||
if m and m not in seen:
|
||||
seen.add(m)
|
||||
merged.append(m)
|
||||
normalized = _strip_ollama_cloud_suffix(m)
|
||||
if normalized and normalized not in seen:
|
||||
seen.add(normalized)
|
||||
merged.append(normalized)
|
||||
if merged:
|
||||
_save_ollama_cloud_cache(merged)
|
||||
return merged
|
||||
@@ -3086,7 +3162,7 @@ def validate_requested_model(
|
||||
"message": f"Model `{requested}` was not found in LM Studio's model listing.",
|
||||
}
|
||||
|
||||
if normalized == "custom":
|
||||
if normalized == "custom" or normalized.startswith("custom:"):
|
||||
# Try probing with correct auth for the api_mode.
|
||||
if api_mode == "anthropic_messages":
|
||||
probe = probe_api_models(api_key, base_url, api_mode=api_mode)
|
||||
@@ -3184,11 +3260,12 @@ def validate_requested_model(
|
||||
if suggestions:
|
||||
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
|
||||
return {
|
||||
"accepted": False,
|
||||
"persist": False,
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": False,
|
||||
"message": (
|
||||
f"Model `{requested}` was not found in the OpenAI Codex model listing."
|
||||
f"Note: `{requested}` was not found in the OpenAI Codex model listing. "
|
||||
"It may still work if your ChatGPT/Codex account has access to a newer or hidden model ID."
|
||||
f"{suggestion_text}"
|
||||
),
|
||||
}
|
||||
|
||||
@@ -255,6 +255,10 @@ def get_nous_subscription_features(
|
||||
terminal_cfg = config.get("terminal") if isinstance(config.get("terminal"), dict) else {}
|
||||
|
||||
web_backend = str(web_cfg.get("backend") or "").strip().lower()
|
||||
# Per-capability overrides: if set, they determine which backend is active for
|
||||
# search/extract independently of web.backend.
|
||||
web_search_backend = str(web_cfg.get("search_backend") or "").strip().lower()
|
||||
web_extract_backend = str(web_cfg.get("extract_backend") or "").strip().lower()
|
||||
tts_provider = str(tts_cfg.get("provider") or "edge").strip().lower()
|
||||
browser_provider_explicit = "cloud_provider" in browser_cfg
|
||||
browser_provider = normalize_browser_cloud_provider(
|
||||
@@ -280,6 +284,7 @@ def get_nous_subscription_features(
|
||||
direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
|
||||
direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
|
||||
direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
|
||||
direct_searxng = bool(get_env_value("SEARXNG_URL"))
|
||||
direct_fal = fal_key_is_configured()
|
||||
direct_openai_tts = bool(resolve_openai_audio_api_key())
|
||||
direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
|
||||
@@ -323,10 +328,18 @@ def get_nous_subscription_features(
|
||||
or (web_backend == "firecrawl" and direct_firecrawl)
|
||||
or (web_backend == "parallel" and direct_parallel)
|
||||
or (web_backend == "tavily" and direct_tavily)
|
||||
or (web_backend == "searxng" and direct_searxng)
|
||||
# Per-capability overrides: search_backend or extract_backend may be set
|
||||
# without web.backend (using the new split config from #20061)
|
||||
or (web_search_backend == "searxng" and direct_searxng)
|
||||
or (web_search_backend == "exa" and direct_exa)
|
||||
or (web_search_backend == "firecrawl" and direct_firecrawl)
|
||||
or (web_search_backend == "parallel" and direct_parallel)
|
||||
or (web_search_backend == "tavily" and direct_tavily)
|
||||
)
|
||||
)
|
||||
web_available = bool(
|
||||
managed_web_available or direct_exa or direct_firecrawl or direct_parallel or direct_tavily
|
||||
managed_web_available or direct_exa or direct_firecrawl or direct_parallel or direct_tavily or direct_searxng
|
||||
)
|
||||
|
||||
image_managed = image_tool_enabled and managed_image_available and not direct_fal
|
||||
@@ -412,8 +425,8 @@ def get_nous_subscription_features(
|
||||
managed_by_nous=web_managed,
|
||||
direct_override=web_active and not web_managed,
|
||||
toolset_enabled=web_tool_enabled,
|
||||
current_provider=web_backend or "",
|
||||
explicit_configured=bool(web_backend),
|
||||
current_provider=web_backend or web_search_backend or "",
|
||||
explicit_configured=bool(web_backend or web_search_backend),
|
||||
),
|
||||
"image_gen": NousFeatureState(
|
||||
key="image_gen",
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user