{"id":"https://openalex.org/W4416749073","doi":"https://doi.org/10.1109/iros60139.2025.11247637","title":"CoCMT: Communication-Efficient Cross-Modal Transformer for Collaborative Perception","display_name":"CoCMT: Communication-Efficient Cross-Modal Transformer for Collaborative Perception","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416749073","doi":"https://doi.org/10.1109/iros60139.2025.11247637"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11247637","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11247637","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073855975","display_name":"Rujia Wang","orcid":"https://orcid.org/0000-0003-4019-5327"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Rujia Wang","raw_affiliation_strings":["Texas A&#x0026;M University,Computer Science and Engineering,College Station,TX,USA,77840"],"affiliations":[{"raw_affiliation_string":"Texas A&#x0026;M University,Computer Science and Engineering,College Station,TX,USA,77840","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003755037","display_name":"Xiangbo Gao","orcid":"https://orcid.org/0000-0001-7123-2675"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiangbo Gao","raw_affiliation_strings":["Texas A&#x0026;M University,Computer Science and Engineering,College Station,TX,USA,77840"],"affiliations":[{"raw_affiliation_string":"Texas A&#x0026;M University,Computer Science and Engineering,College Station,TX,USA,77840","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022145072","display_name":"Xiang Hao","orcid":"https://orcid.org/0000-0002-3931-6884"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao Xiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100700826","display_name":"Runsheng Xu","orcid":"https://orcid.org/0000-0001-7375-9833"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Runsheng Xu","raw_affiliation_strings":["Texas A&#x0026;M University,Computer Science and Engineering,College Station,TX,USA,77840"],"affiliations":[{"raw_affiliation_string":"Texas A&#x0026;M University,Computer Science and Engineering,College Station,TX,USA,77840","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015173810","display_name":"Zhengzhong Tu","orcid":"https://orcid.org/0000-0002-7594-2292"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhengzhong Tu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5073855975"],"corresponding_institution_ids":["https://openalex.org/I91045830"],"apc_list":null,"apc_paid":null,"fwci":3.6402,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.94001096,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"2471","last_page":"2478"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.3499000072479248,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.3499000072479248,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.16750000417232513,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.09130000323057175,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.6453999876976013},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5012000203132629},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.4489000141620636},{"id":"https://openalex.org/keywords/fuse","display_name":"Fuse (electrical)","score":0.37400001287460327},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.3702000081539154},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.36500000953674316},{"id":"https://openalex.org/keywords/active-perception","display_name":"Active perception","score":0.3328999876976013}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7265999913215637},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.6453999876976013},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5012000203132629},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.4489000141620636},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.42179998755455017},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.37400001287460327},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3702000081539154},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.36500000953674316},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3400000035762787},{"id":"https://openalex.org/C2776010242","wikidata":"https://www.wikidata.org/wiki/Q4677575","display_name":"Active perception","level":3,"score":0.3328999876976013},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.32600000500679016},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3197000026702881},{"id":"https://openalex.org/C554579003","wikidata":"https://www.wikidata.org/wiki/Q474157","display_name":"Collaborative software","level":2,"score":0.3158999979496002},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3075000047683716},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3010999858379364},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.2915000021457672},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2759999930858612},{"id":"https://openalex.org/C101765175","wikidata":"https://www.wikidata.org/wiki/Q577764","display_name":"Communications system","level":2,"score":0.258899986743927},{"id":"https://openalex.org/C158156997","wikidata":"https://www.wikidata.org/wiki/Q1416645","display_name":"Models of communication","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11247637","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11247637","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W2982681137","https://openalex.org/W2996759437","https://openalex.org/W3027789438","https://openalex.org/W3096609285","https://openalex.org/W3109395584","https://openalex.org/W3109991383","https://openalex.org/W3110109289","https://openalex.org/W3131387676","https://openalex.org/W3201193904","https://openalex.org/W4225793049","https://openalex.org/W4306179760","https://openalex.org/W4312312588","https://openalex.org/W4312604822","https://openalex.org/W4312894406","https://openalex.org/W4319299982","https://openalex.org/W4383066393","https://openalex.org/W4383108597","https://openalex.org/W4386066469","https://openalex.org/W4390872473","https://openalex.org/W4390874111","https://openalex.org/W4390874213","https://openalex.org/W4390874598","https://openalex.org/W4391697114","https://openalex.org/W4392902736","https://openalex.org/W4401414525","https://openalex.org/W4401416097","https://openalex.org/W4402354044","https://openalex.org/W4402727636","https://openalex.org/W4404687984","https://openalex.org/W4405786382","https://openalex.org/W4408696576","https://openalex.org/W4409917551","https://openalex.org/W4414198593"],"related_works":[],"abstract_inverted_index":{"Multi-agent":[0],"collaborative":[1,37,165],"perception":[2,16,38,70,166],"enhances":[3],"each":[4],"agent\u2019s":[5],"perceptual":[6],"capabilities":[7],"by":[8,83,158],"sharing":[9],"sensing":[10],"information":[11],"to":[12,59,99,111,119],"cooperatively":[13],"perform":[14],"robot":[15],"tasks.":[17],"This":[18,160],"approach":[19],"has":[20],"proven":[21],"effective":[22],"in":[23,168],"addressing":[24],"challenges":[25],"such":[26,44],"as":[27,45],"sensor":[28],"deficiencies,":[29],"occlusions,":[30],"and":[31,86,105,126,177],"long-range":[32],"perception.":[33],"However,":[34],"existing":[35],"representative":[36],"systems":[39],"transmit":[40],"intermediate":[41],"feature":[42],"maps,":[43],"bird\u2019s-eye":[46],"view":[47],"(BEV)":[48],"representations,":[49],"which":[50],"contain":[51],"a":[52,107],"significant":[53],"amount":[54],"of":[55],"non-critical":[56],"information,":[57],"leading":[58,118],"high":[60],"communication":[61,66,81,137],"bandwidth":[62,82],"requirements.":[63],"To":[64],"enhance":[65,112],"efficiency":[67,161],"while":[68,134],"preserving":[69],"capability,":[71],"we":[72,92],"introduce":[73,93],"CoCMT,":[74,91],"an":[75],"object-query-based":[76],"collaboration":[77],"framework":[78],"that":[79],"optimizes":[80],"selectively":[84],"extracting":[85],"transmitting":[87],"essential":[88],"features.":[89],"Within":[90],"the":[94,113,182],"Efficient":[95],"Query":[96],"Transformer":[97],"(EQFormer)":[98],"effectively":[100],"fuse":[101],"multi-agent":[102],"object":[103,144],"queries":[104],"implement":[106],"synergistic":[108],"deep":[109],"supervision":[110],"positive":[114],"reinforcement":[115],"between":[116],"stages,":[117],"improved":[120],"overall":[121],"performance.":[122],"Experiments":[123],"on":[124],"OPV2V":[125],"V2V4Real":[127],"datasets":[128],"show":[129],"CoCMT":[130],"outperforms":[131],"state-of-the-art":[132],"methods":[133],"drastically":[135],"reducing":[136],"needs.":[138],"On":[139],"V2V4Real,":[140],"our":[141],"model":[142],"(Top-50":[143],"queries)":[145],"requires":[146],"only":[147],"0.416":[148],"Mb":[149],"bandwidth\u201483":[150],"times":[151],"less":[152],"than":[153],"SOTA":[154],"methods\u2014while":[155],"improving":[156],"AP@70":[157],"1.1%.":[159],"breakthrough":[162],"enables":[163],"practical":[164],"deployment":[167],"bandwidth-constrained":[169],"environments":[170],"without":[171],"sacrificing":[172],"detection":[173],"accuracy.":[174],"The":[175],"code":[176],"models":[178],"are":[179],"open-sourced":[180],"through":[181],"following":[183],"link:":[184],"https://github.com/taco-group/COCMT.":[185]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-11-28T00:00:00"}
