{"id":"https://openalex.org/W7084073345","doi":"https://doi.org/10.1109/infocomwkshps65812.2025.11152896","title":"AVLLM-Based Multimodal Reasoning Segmentation and Detection Approach for Intelligent Driving","display_name":"AVLLM-Based Multimodal Reasoning Segmentation and Detection Approach for Intelligent Driving","publication_year":2025,"publication_date":"2025-05-19","ids":{"openalex":"https://openalex.org/W7084073345","doi":"https://doi.org/10.1109/infocomwkshps65812.2025.11152896"},"language":"en","primary_location":{"id":"doi:10.1109/infocomwkshps65812.2025.11152896","is_oa":false,"landing_page_url":"https://doi.org/10.1109/infocomwkshps65812.2025.11152896","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE INFOCOM 2025 - IEEE Conference on Computer Communications Workshops (INFOCOM WKSHPS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Tantan Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tantan Zhao","raw_affiliation_strings":["Xi&#x2019;an Jiaotong University,School of Information &amp; Communications Engineering,China"],"affiliations":[{"raw_affiliation_string":"Xi&#x2019;an Jiaotong University,School of Information &amp; Communications Engineering,China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Fan Li","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fan Li","raw_affiliation_strings":["Xi&#x2019;an Jiaotong University,School of Information &amp; Communications Engineering,China"],"affiliations":[{"raw_affiliation_string":"Xi&#x2019;an Jiaotong University,School of Information &amp; Communications Engineering,China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zehua Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Zehua Wang","raw_affiliation_strings":["The University of British Columbia,Department of Electrical &amp; Computer Engineering,Canada"],"affiliations":[{"raw_affiliation_string":"The University of British Columbia,Department of Electrical &amp; Computer Engineering,Canada","institution_ids":["https://openalex.org/I141945490"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Wei Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Chen","raw_affiliation_strings":["China University of Mining and Technology,School of Computer Science &amp; Technology,China"],"affiliations":[{"raw_affiliation_string":"China University of Mining and Technology,School of Computer Science &amp; Technology,China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"last","author":{"id":null,"display_name":"Xinyu Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Xinyu Huang","raw_affiliation_strings":["University of Waterloo,Department of Electrical &amp; Computer Engineering,Canada"],"affiliations":[{"raw_affiliation_string":"University of Waterloo,Department of Electrical &amp; Computer Engineering,Canada","institution_ids":["https://openalex.org/I151746483"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.57410116,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T14468","display_name":"Plant Ecology and Soil Science","score":0.10729999840259552,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14468","display_name":"Plant Ecology and Soil Science","score":0.10729999840259552,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13102","display_name":"Soil and Environmental Studies","score":0.07109999656677246,"subfield":{"id":"https://openalex.org/subfields/1111","display_name":"Soil Science"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12364","display_name":"Archaeological Research and Protection","score":0.06719999760389328,"subfield":{"id":"https://openalex.org/subfields/1912","display_name":"Space and Planetary Science"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7426999807357788},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5450000166893005},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.4957999885082245},{"id":"https://openalex.org/keywords/opportunistic-reasoning","display_name":"Opportunistic reasoning","score":0.47440001368522644},{"id":"https://openalex.org/keywords/reasoning-system","display_name":"Reasoning system","score":0.4172999858856201},{"id":"https://openalex.org/keywords/model-based-reasoning","display_name":"Model-based reasoning","score":0.3732999861240387},{"id":"https://openalex.org/keywords/case-based-reasoning","display_name":"Case-based reasoning","score":0.3479999899864197},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.34200000762939453}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7487000226974487},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7426999807357788},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5996999740600586},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5450000166893005},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.4957999885082245},{"id":"https://openalex.org/C86827895","wikidata":"https://www.wikidata.org/wiki/Q7098582","display_name":"Opportunistic reasoning","level":4,"score":0.47440001368522644},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.43529999256134033},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.4172999858856201},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.3732999861240387},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37130001187324524},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.3479999899864197},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.34200000762939453},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.33090001344680786},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.326200008392334},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.31929999589920044},{"id":"https://openalex.org/C47796450","wikidata":"https://www.wikidata.org/wiki/Q508378","display_name":"Intelligent transportation system","level":2,"score":0.31700000166893005},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.28949999809265137},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.2883000075817108},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.288100004196167},{"id":"https://openalex.org/C65885262","wikidata":"https://www.wikidata.org/wiki/Q7429708","display_name":"Scale-space segmentation","level":4,"score":0.2847999930381775},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2815000116825104},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.26980000734329224},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.25279998779296875},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/infocomwkshps65812.2025.11152896","is_oa":false,"landing_page_url":"https://doi.org/10.1109/infocomwkshps65812.2025.11152896","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE INFOCOM 2025 - IEEE Conference on Computer Communications Workshops (INFOCOM WKSHPS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"},{"id":"https://openalex.org/F4320329861","display_name":"Natural Science Foundation of Sichuan Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W3008115128","https://openalex.org/W3081168214","https://openalex.org/W4353004344","https://openalex.org/W4383220091","https://openalex.org/W4386065453","https://openalex.org/W4390874575","https://openalex.org/W4402670135","https://openalex.org/W4402754134","https://openalex.org/W4404612908"],"related_works":[],"abstract_inverted_index":{"Reasoning":[0,23],"segmentation":[1,44,127,151],"and":[2,9,25,45,58,76,97,115,124,128,147,152,160,168],"detection":[3,46,129,153],"are":[4,39,99],"important":[5],"to":[6,84,89,101,109],"realize":[7],"secure":[8],"reliable":[10],"intelligent":[11],"driving.":[12],"In":[13],"this":[14],"paper,":[15],"we":[16],"propose":[17],"an":[18],"Audio-Vision-Language":[19],"Large":[20],"Model-based":[21],"Multimodal":[22],"Segmentation":[24],"Detection":[26],"(AVLLM-MRSD)":[27],"approach,":[28],"where":[29],"audio":[30,74,87],"prompts":[31,34],"or":[32,37],"text":[33,91,95],"in":[35,134],"Chinese":[36],"English":[38],"allowed":[40],"when":[41],"executing":[42],"reasoning":[43,55,112,126,150],"tasks.":[47,130],"The":[48],"proposed":[49,120,141],"AVLLM-MRSD":[50,142],"approach":[51,143],"can":[52,144],"simultaneously":[53],"output":[54],"language":[56,78,113],"response":[57,114],"segmented-detected":[59,116],"image.":[60,117],"Specifically,":[61],"the":[62,73,86,90,94,102,111,119,140,165],"multimodal":[63,103],"audio-language":[64],"large":[65,77,105],"model":[66,79,106],"(ALLM)":[67],"module":[68,108],"which":[69],"is":[70,81],"composed":[71],"of":[72,154],"encoder":[75],"(LLM)":[80],"first":[82],"employed":[83],"convert":[85],"prompt":[88,96],"prompt.":[92],"Then,":[93],"image":[98],"input":[100],"vision-language":[104],"(VLLM)":[107],"obtain":[110],"Besides,":[118],"method":[121],"supports":[122],"multi-round":[123],"multi-object":[125],"Extensive":[131],"experimental":[132],"results":[133],"real":[135],"traffic":[136,161],"scenarios":[137],"show":[138],"that":[139],"more":[145],"efficiently":[146],"accurately":[148],"complete":[149],"various":[155],"vehicles,":[156],"pedestrians,":[157],"lanes,":[158],"barriers,":[159],"lights":[162],"compared":[163],"with":[164],"existing":[166],"LISA":[167],"Grounded":[169],"SAM":[170],"methods.":[171]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
