{"id":"https://openalex.org/W4416749753","doi":"https://doi.org/10.1109/iros60139.2025.11247360","title":"DriveLMM-o1: A Step-by-Step Reasoning Dataset and Large Multimodal Model for Driving Scenario Understanding","display_name":"DriveLMM-o1: A Step-by-Step Reasoning Dataset and Large Multimodal Model for Driving Scenario Understanding","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416749753","doi":"https://doi.org/10.1109/iros60139.2025.11247360"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11247360","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11247360","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Ayesha Ishaq","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Ayesha Ishaq","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049890269","display_name":"Jean Lahoud","orcid":"https://orcid.org/0000-0003-0315-6484"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Jean Lahoud","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115002295","display_name":"Ketan More","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Ketan More","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074650489","display_name":"Omkar Thawakar","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Omkar Thawakar","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115860474","display_name":"Ritesh Thawkar","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Ritesh Thawkar","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108371642","display_name":"Dinura Dissanayake","orcid":"https://orcid.org/0009-0007-6339-1538"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Dinura Dissanayake","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114415249","display_name":"Noor Ahsan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Noor Ahsan","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071052221","display_name":"Yuhao Li","orcid":"https://orcid.org/0000-0002-9918-756X"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Yuhao Li","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100760570","display_name":"Fahad Shahbaz Khan","orcid":"https://orcid.org/0000-0002-4263-3143"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Fahad Shahbaz Khan","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009362997","display_name":"Hisham Cholakkal","orcid":"https://orcid.org/0000-0002-8230-9065"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Hisham Cholakkal","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087781064","display_name":"Ivan Laptev","orcid":"https://orcid.org/0000-0001-7072-3325"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Ivan Laptev","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022929869","display_name":"Rao Muhammad Anwer","orcid":"https://orcid.org/0000-0002-9041-2214"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Rao Muhammad Anwer","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000300751","display_name":"Salman Khan","orcid":"https://orcid.org/0000-0002-9502-1749"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Salman Khan","raw_affiliation_strings":["Mohamed Bin Zayed University of Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mohamed Bin Zayed University of Artificial Intelligence","institution_ids":["https://openalex.org/I4210113480"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":13,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.8699,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.88471601,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"20501","last_page":"20508"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.0015999999595806003,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.0008999999845400453,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.7207000255584717},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7157999873161316},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.682200014591217},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5799999833106995},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5587999820709229},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5527999997138977},{"id":"https://openalex.org/keywords/qualitative-reasoning","display_name":"Qualitative reasoning","score":0.45570001006126404},{"id":"https://openalex.org/keywords/model-based-reasoning","display_name":"Model-based reasoning","score":0.4316999912261963},{"id":"https://openalex.org/keywords/reasoning-system","display_name":"Reasoning system","score":0.4074000120162964}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7677000164985657},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.7207000255584717},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7157999873161316},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.682200014591217},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6618000268936157},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5799999833106995},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5587999820709229},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5527999997138977},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5332000255584717},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.45570001006126404},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.4316999912261963},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.4074000120162964},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.40049999952316284},{"id":"https://openalex.org/C166088908","wikidata":"https://www.wikidata.org/wiki/Q308495","display_name":"Abductive reasoning","level":2,"score":0.39989998936653137},{"id":"https://openalex.org/C86827895","wikidata":"https://www.wikidata.org/wiki/Q7098582","display_name":"Opportunistic reasoning","level":4,"score":0.3815000057220459},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.3564999997615814},{"id":"https://openalex.org/C193221554","wikidata":"https://www.wikidata.org/wiki/Q5153664","display_name":"Commonsense reasoning","level":2,"score":0.3452000021934509},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.32109999656677246},{"id":"https://openalex.org/C43971567","wikidata":"https://www.wikidata.org/wiki/Q3142865","display_name":"Logical reasoning","level":2,"score":0.3183000087738037},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.30869999527931213},{"id":"https://openalex.org/C97364631","wikidata":"https://www.wikidata.org/wiki/Q484284","display_name":"Deductive reasoning","level":2,"score":0.2874999940395355},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.2838999927043915},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.28209999203681946},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.2757999897003174},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.26510000228881836},{"id":"https://openalex.org/C103057564","wikidata":"https://www.wikidata.org/wiki/Q4751139","display_name":"Analytic reasoning","level":3,"score":0.26030001044273376}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11247360","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11247360","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W2885138528","https://openalex.org/W2962833140","https://openalex.org/W3035574168","https://openalex.org/W4389519119","https://openalex.org/W4391770672","https://openalex.org/W4393149666","https://openalex.org/W4401386967","https://openalex.org/W4401414574","https://openalex.org/W4402713111","https://openalex.org/W4402716252","https://openalex.org/W4403842289","https://openalex.org/W4403843558","https://openalex.org/W4404820176","https://openalex.org/W4409362112","https://openalex.org/W4412886751","https://openalex.org/W4412944664"],"related_works":[],"abstract_inverted_index":{"While":[0],"large":[1,168],"multimodal":[2,169],"models":[3],"(LMMs)":[4],"have":[5],"demonstrated":[6],"strong":[7],"performance":[8,180],"across":[9],"various":[10,189],"Visual":[11],"Question":[12],"Answering":[13],"(VQA)":[14],"tasks,":[15],"certain":[16],"challenges":[17],"require":[18],"complex":[19,182],"multi-step":[20],"reasoning":[21,78,97,120,155,176,201,223],"to":[22,116,156],"reach":[23],"accurate":[24,85],"answers.":[25],"One":[26],"particularly":[27],"challenging":[28],"task":[29],"is":[30,54,172],"autonomous":[31,122,161,204],"driving,":[32],"which":[33],"demands":[34],"thorough":[35],"cognitive":[36],"processing":[37],"before":[38],"decisions":[39],"can":[40],"be":[41],"made.":[42],"In":[43,185],"this":[44,104],"domain,":[45],"a":[46,91,109,167,210,219],"sequential":[47],"and":[48,60,112,135,149,191,234],"interpretive":[49],"understanding":[50],"of":[51,71,84],"visual":[52,119],"cues":[53],"essential":[55],"for":[56,94,121,203],"effective":[57],"perception,":[58,147],"prediction,":[59,148],"planning.":[61],"Nevertheless,":[62],"common":[63],"VQA":[64,129],"benchmarks":[65],"often":[66],"focus":[67],"on":[68,146,174,194],"the":[69,72,77,82,132,140,226],"accuracy":[70],"final":[73,214],"answer":[74,215],"while":[75],"overlooking":[76],"process":[79],"that":[80,171],"enables":[81],"generation":[83],"responses.":[86],"Moreover,":[87],"existing":[88],"methods":[89,193],"lack":[90],"comprehensive":[92],"framework":[93],"evaluating":[95],"step-by-step":[96,154],"in":[98,131,139,160,181,213,222],"realistic":[99],"driving":[100,162,183,205],"scenarios.":[101,163,184],"To":[102],"address":[103],"gap,":[105],"we":[106,187],"propose":[107],"DriveLMM-o1,":[108],"new":[110],"dataset":[111],"benchmark":[113,125,188],"specifically":[114],"designed":[115],"advance":[117],"step-wise":[118],"driving.":[123],"Our":[124,207,231],"features":[126],"over":[127,225],"18k":[128],"examples":[130],"training":[133],"set":[134],"more":[136],"than":[137],"4k":[138],"test":[141],"set,":[142],"covering":[143],"diverse":[144],"questions":[145],"planning,":[150],"each":[151],"enriched":[152],"with":[153,218],"ensure":[157],"logical":[158],"inference":[159],"We":[164],"further":[165],"introduce":[166],"model":[170,208,235],"fine-tuned":[173],"our":[175,195],"dataset,":[177,197,233],"demonstrating":[178],"robust":[179],"addition,":[186],"open-source":[190,229],"closed-source":[192],"proposed":[196],"systematically":[198],"comparing":[199],"their":[200],"capabilities":[202],"tasks.":[206],"achieves":[209],"+7.49%":[211],"gain":[212],"accuracy,":[216],"along":[217],"3.62%":[220],"improvement":[221],"score":[224],"previous":[227],"best":[228],"model.":[230],"framework,":[232],"are":[236],"available":[237],"at":[238],"https://github.com/ayesha-ishaq/DriveLMM-o1.":[239]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-28T00:00:00"}
