{"id":"https://openalex.org/W7131435915","doi":"https://doi.org/10.1109/jiot.2026.3668085","title":"Multimodal Large Language Models for Perception in Autonomous Driving: Architecture, Taxonomy, and Challenges","display_name":"Multimodal Large Language Models for Perception in Autonomous Driving: Architecture, Taxonomy, and Challenges","publication_year":2026,"publication_date":"2026-02-25","ids":{"openalex":"https://openalex.org/W7131435915","doi":"https://doi.org/10.1109/jiot.2026.3668085"},"language":null,"primary_location":{"id":"doi:10.1109/jiot.2026.3668085","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jiot.2026.3668085","pdf_url":null,"source":{"id":"https://openalex.org/S2480266640","display_name":"IEEE Internet of Things Journal","issn_l":"2327-4662","issn":["2327-4662","2372-2541"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Internet of Things Journal","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101577987","display_name":"Ying Jing","orcid":"https://orcid.org/0000-0003-1901-8967"},"institutions":[{"id":"https://openalex.org/I4210145005","display_name":"State Key Laboratory of Vehicle NVH and Safety Technology","ror":"https://ror.org/04e6h1p91","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210145005"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Jing","raw_affiliation_strings":["State Key Laboratory of Intelligent Green Vehicle and Mobility and School of Vehicle and Mobility,, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-1901-8967","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Intelligent Green Vehicle and Mobility and School of Vehicle and Mobility,, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I4210145005"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xinyu Zhang","orcid":"https://orcid.org/0000-0003-0034-9037"},"institutions":[{"id":"https://openalex.org/I4210145005","display_name":"State Key Laboratory of Vehicle NVH and Safety Technology","ror":"https://ror.org/04e6h1p91","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210145005"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinyu Zhang","raw_affiliation_strings":["State Key Laboratory of Intelligent Green Vehicle and Mobility and School of Vehicle and Mobility,, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-0034-9037","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Intelligent Green Vehicle and Mobility and School of Vehicle and Mobility,, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I4210145005"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mo Zhou","orcid":"https://orcid.org/0009-0004-2192-8673"},"institutions":[{"id":"https://openalex.org/I4210145005","display_name":"State Key Laboratory of Vehicle NVH and Safety Technology","ror":"https://ror.org/04e6h1p91","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210145005"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mo Zhou","raw_affiliation_strings":["State Key Laboratory of Intelligent Green Vehicle and Mobility and School of Vehicle and Mobility,, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0004-2192-8673","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Intelligent Green Vehicle and Mobility and School of Vehicle and Mobility,, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I4210145005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009369111","display_name":"Yuchuan Ji","orcid":"https://orcid.org/0000-0002-4640-6685"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuchuan Ji","raw_affiliation_strings":["School of Rail Transportation, Soochow University, Suzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Rail Transportation, Soochow University, Suzhou, China","institution_ids":["https://openalex.org/I3923682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058811227","display_name":"Yanchao Ding","orcid":"https://orcid.org/0000-0003-1803-1713"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanchao Ding","raw_affiliation_strings":["School of Transportation Science and Engineering, Beihang University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Transportation Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050267276","display_name":"Jialun Yin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210145005","display_name":"State Key Laboratory of Vehicle NVH and Safety Technology","ror":"https://ror.org/04e6h1p91","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210145005"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jialun Yin","raw_affiliation_strings":["State Key Laboratory of Intelligent Green Vehicle and Mobility and School of Vehicle and Mobility,, Tsinghua University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Intelligent Green Vehicle and Mobility and School of Vehicle and Mobility,, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I4210145005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126786843","display_name":"Ruizhi Jia","orcid":null},"institutions":[{"id":"https://openalex.org/I96673099","display_name":"Technical University of Denmark","ror":"https://ror.org/04qtj9h94","country_code":"DK","type":"education","lineage":["https://openalex.org/I96673099"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Ruizhi Jia","raw_affiliation_strings":["Department of Electrical and Photonics Engineering, Technical University of Denmark, Kongens Lyngby, Denmark"],"raw_orcid":"https://orcid.org/0009-0008-5692-3310","affiliations":[{"raw_affiliation_string":"Department of Electrical and Photonics Engineering, Technical University of Denmark, Kongens Lyngby, Denmark","institution_ids":["https://openalex.org/I96673099"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068805137","display_name":"Yijin Xiong","orcid":"https://orcid.org/0000-0002-9733-6875"},"institutions":[{"id":"https://openalex.org/I4210145005","display_name":"State Key Laboratory of Vehicle NVH and Safety Technology","ror":"https://ror.org/04e6h1p91","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210145005"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yijin Xiong","raw_affiliation_strings":["State Key Laboratory of Intelligent Green Vehicle and Mobility and School of Vehicle and Mobility,, Tsinghua University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Intelligent Green Vehicle and Mobility and School of Vehicle and Mobility,, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I4210145005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126791933","display_name":"Kun Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210125878","display_name":"Suzhou Research Institute","ror":"https://ror.org/03ebk0c60","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210125878"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kun Zhao","raw_affiliation_strings":["Suzhou Automobile Research Institute, Tsinghua University, Suzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Suzhou Automobile Research Institute, Tsinghua University, Suzhou, China","institution_ids":["https://openalex.org/I4210125878","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100404947","display_name":"Jie Yang","orcid":"https://orcid.org/0000-0003-4801-7162"},"institutions":[{"id":"https://openalex.org/I4210125878","display_name":"Suzhou Research Institute","ror":"https://ror.org/03ebk0c60","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210125878"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Yang","raw_affiliation_strings":["Suzhou Automobile Research Institute, Tsinghua University, Suzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Suzhou Automobile Research Institute, Tsinghua University, Suzhou, China","institution_ids":["https://openalex.org/I4210125878","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jun Li","orcid":"https://orcid.org/0000-0002-0437-5112"},"institutions":[{"id":"https://openalex.org/I4210145005","display_name":"State Key Laboratory of Vehicle NVH and Safety Technology","ror":"https://ror.org/04e6h1p91","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210145005"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Li","raw_affiliation_strings":["State Key Laboratory of Intelligent Green Vehicle and Mobility and School of Vehicle and Mobility,, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-0437-5112","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Intelligent Green Vehicle and Mobility and School of Vehicle and Mobility,, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I4210145005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041101317","display_name":"Huaping Liu","orcid":"https://orcid.org/0000-0002-4042-6044"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huaping Liu","raw_affiliation_strings":["Department of Computer Science and Technology, State Key Laboratory of Intelligent Technology and Systems, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-4042-6044","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, State Key Laboratory of Intelligent Technology and Systems, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23984688,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"13","issue":"10","first_page":"20375","last_page":"20397"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8708999752998352,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8708999752998352,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.019500000402331352,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.01119999960064888,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.6951000094413757},{"id":"https://openalex.org/keywords/obstacle","display_name":"Obstacle","score":0.6098999977111816},{"id":"https://openalex.org/keywords/mainstream","display_name":"Mainstream","score":0.5428000092506409},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4429999887943268},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3799000084400177},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.37470000982284546}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7627999782562256},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.6951000094413757},{"id":"https://openalex.org/C2776650193","wikidata":"https://www.wikidata.org/wiki/Q264661","display_name":"Obstacle","level":2,"score":0.6098999977111816},{"id":"https://openalex.org/C2777617010","wikidata":"https://www.wikidata.org/wiki/Q18957","display_name":"Mainstream","level":2,"score":0.5428000092506409},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5245000123977661},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4429999887943268},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.40630000829696655},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3799000084400177},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.37470000982284546},{"id":"https://openalex.org/C2777362162","wikidata":"https://www.wikidata.org/wiki/Q5594431","display_name":"Grand Challenges","level":2,"score":0.3278000056743622},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.3253999948501587},{"id":"https://openalex.org/C207267971","wikidata":"https://www.wikidata.org/wiki/Q120208","display_name":"Emerging technologies","level":2,"score":0.2955999970436096},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.28949999809265137},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.27790001034736633},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.272599995136261},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.25690001249313354}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jiot.2026.3668085","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jiot.2026.3668085","pdf_url":null,"source":{"id":"https://openalex.org/S2480266640","display_name":"IEEE Internet of Things Journal","issn_l":"2327-4662","issn":["2327-4662","2372-2541"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Internet of Things Journal","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1095835165","display_name":null,"funder_award_id":"52221005","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6313768182","display_name":null,"funder_award_id":"62273198","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Autonomous":[0],"vehicles":[1],"rely":[2],"on":[3,110,170],"continuous":[4],"environmental":[5,94],"perception":[6,17,95,115,136],"to":[7,11,62,81,114,201],"assess":[8],"obstacle":[9],"distribution":[10],"ensure":[12],"safe":[13],"driving.":[14],"However,":[15],"current":[16,167],"technologies":[18],"face":[19],"substantial":[20],"challenges,":[21],"particularly":[22],"under":[23],"adverse":[24],"weather":[25],"conditions":[26],"and":[27,89,142,163,177,191],"when":[28],"encountering":[29],"long-tail":[30],"scenarios.":[31],"With":[32],"the":[33,63,133,151,188],"advent":[34],"of":[35,65,124,156,166],"transformer-based":[36],"attention":[37],"mechanisms,":[38],"large":[39,67],"language":[40,68],"models":[41,69,76],"(LLMs),":[42],"exemplified":[43],"by":[44,207],"GPT,":[45],"have":[46],"exhibited":[47],"emergent":[48],"intelligence,":[49],"offering":[50],"new":[51],"possibilities":[52],"for":[53],"achieving":[54],"high-performance":[55],"perception.":[56,129,197],"This":[57,198],"technological":[58],"advancement":[59],"has":[60],"led":[61],"development":[64],"multi-modal":[66,73],"(MLLMs),":[70],"which":[71],"incorporate":[72],"encoders.":[74],"These":[75],"enable":[77],"a":[78,104,121,161],"single":[79],"LLM":[80],"process":[82],"multi-source":[83],"data":[84],"while":[85],"performing":[86],"advanced":[87],"understanding":[88],"reasoning":[90],"tasks,":[91,137],"enhancing":[92],"complex":[93],"capabilities.":[96],"Despite":[97],"significant":[98],"progress":[99,204],"in":[100,107,127,195,205],"MLLMs,":[101,168],"there":[102],"remains":[103],"notable":[105],"gap":[106],"systematic":[108],"research":[109,193],"their":[111,181],"optimal":[112],"application":[113],"tasks.":[116],"Therefore,":[117],"this":[118],"paper":[119],"presents":[120],"comprehensive":[122],"survey":[123,199],"recent":[125],"advancements":[126],"MLLM-based":[128],"First,":[130],"we":[131,149,159,186],"introduce":[132],"mainstream":[134],"vision\u2013language":[135],"widely":[138],"adopted":[139],"evaluation":[140],"metrics,":[141],"existing":[143],"language-enhanced":[144],"autonomous":[145],"driving":[146],"datasets.":[147],"Next,":[148],"outline":[150],"general":[152],"architectural":[153],"design":[154],"principles":[155],"MLLMs.":[157],"Subsequently,":[158],"provide":[160],"taxonomy":[162],"indepth":[164],"analysis":[165],"focusing":[169],"three":[171],"dimensions:":[172],"input":[173],"modality,":[174],"alignment":[175],"technique,":[176],"scene":[178],"representation,":[179],"elucidating":[180],"underlying":[182],"implementation":[183],"paradigms.":[184],"Finally,":[185],"summarize":[187],"key":[189],"challenges":[190],"emerging":[192],"directions":[194],"MLLM-driven":[196],"aims":[200],"facilitate":[202],"further":[203],"MLLMs":[206],"synthesizing":[208],"these":[209],"insights.":[210]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-26T00:00:00"}
