{"id":"https://openalex.org/W4413018539","doi":"https://doi.org/10.1109/iv64158.2025.11097781","title":"BEV-LLM: Leveraging Multimodal BEV Maps for Scene Captioning in Autonomous Driving","display_name":"BEV-LLM: Leveraging Multimodal BEV Maps for Scene Captioning in Autonomous Driving","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4413018539","doi":"https://doi.org/10.1109/iv64158.2025.11097781"},"language":"en","primary_location":{"id":"doi:10.1109/iv64158.2025.11097781","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iv64158.2025.11097781","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Intelligent Vehicles Symposium (IV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119232431","display_name":"Felix Brandst\u00e4tter","orcid":null},"institutions":[{"id":"https://openalex.org/I4210105785","display_name":"Intelligent Systems Research (United States)","ror":"https://ror.org/01reevc91","country_code":"US","type":"company","lineage":["https://openalex.org/I4210105785"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Felix Brandst\u00e4tter","raw_affiliation_strings":["Munich University of Applied Sciences,Intelligent Vehicles Lab (IVL),Munich,Germany,80335"],"affiliations":[{"raw_affiliation_string":"Munich University of Applied Sciences,Intelligent Vehicles Lab (IVL),Munich,Germany,80335","institution_ids":["https://openalex.org/I4210105785"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111015308","display_name":"Erik Sch\u00fctz","orcid":null},"institutions":[{"id":"https://openalex.org/I4210105785","display_name":"Intelligent Systems Research (United States)","ror":"https://ror.org/01reevc91","country_code":"US","type":"company","lineage":["https://openalex.org/I4210105785"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Erik Sch\u00fctz","raw_affiliation_strings":["Munich University of Applied Sciences,Intelligent Vehicles Lab (IVL),Munich,Germany,80335"],"affiliations":[{"raw_affiliation_string":"Munich University of Applied Sciences,Intelligent Vehicles Lab (IVL),Munich,Germany,80335","institution_ids":["https://openalex.org/I4210105785"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056930219","display_name":"Katharina Winter","orcid":null},"institutions":[{"id":"https://openalex.org/I4210105785","display_name":"Intelligent Systems Research (United States)","ror":"https://ror.org/01reevc91","country_code":"US","type":"company","lineage":["https://openalex.org/I4210105785"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Katharina Winter","raw_affiliation_strings":["Munich University of Applied Sciences,Intelligent Vehicles Lab (IVL),Munich,Germany,80335"],"affiliations":[{"raw_affiliation_string":"Munich University of Applied Sciences,Intelligent Vehicles Lab (IVL),Munich,Germany,80335","institution_ids":["https://openalex.org/I4210105785"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007686963","display_name":"Fabian B. Flohr","orcid":"https://orcid.org/0000-0002-1499-3790"},"institutions":[{"id":"https://openalex.org/I4210105785","display_name":"Intelligent Systems Research (United States)","ror":"https://ror.org/01reevc91","country_code":"US","type":"company","lineage":["https://openalex.org/I4210105785"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fabian B. Flohr","raw_affiliation_strings":["Munich University of Applied Sciences,Intelligent Vehicles Lab (IVL),Munich,Germany,80335"],"affiliations":[{"raw_affiliation_string":"Munich University of Applied Sciences,Intelligent Vehicles Lab (IVL),Munich,Germany,80335","institution_ids":["https://openalex.org/I4210105785"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5119232431"],"corresponding_institution_ids":["https://openalex.org/I4210105785"],"apc_list":null,"apc_paid":null,"fwci":1.2512,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.82347557,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"345","last_page":"350"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9857000112533569,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9503741264343262},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7653511166572571},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.49090057611465454},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4572271406650543},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4386121332645416},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.14573320746421814}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9503741264343262},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7653511166572571},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.49090057611465454},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4572271406650543},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4386121332645416},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.14573320746421814}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iv64158.2025.11097781","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iv64158.2025.11097781","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Intelligent Vehicles Symposium (IV)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2963341956","https://openalex.org/W3035574168","https://openalex.org/W3209355071","https://openalex.org/W4319300501","https://openalex.org/W4361229539","https://openalex.org/W4366850747","https://openalex.org/W4383066393","https://openalex.org/W4385245566","https://openalex.org/W4391508113","https://openalex.org/W4391770672","https://openalex.org/W4391949040","https://openalex.org/W4393084815","https://openalex.org/W4402727764","https://openalex.org/W4402952251","https://openalex.org/W4404341829","https://openalex.org/W4404350186","https://openalex.org/W4404820176","https://openalex.org/W4409366009","https://openalex.org/W4409366820","https://openalex.org/W6739901393","https://openalex.org/W6761205521","https://openalex.org/W6796581206","https://openalex.org/W6849177959","https://openalex.org/W6851948999","https://openalex.org/W6851950068","https://openalex.org/W6859738421","https://openalex.org/W6861312861","https://openalex.org/W6874071591","https://openalex.org/W6874157863"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Autonomous":[0],"driving":[1,32,56,133],"technology":[2],"has":[3],"the":[4,15,31,93],"potential":[5],"to":[6,61,100,126],"transform":[7],"transportation,":[8],"but":[9],"its":[10],"wide":[11],"adoption":[12],"depends":[13],"on":[14,92,114,122],"development":[16],"of":[17,30,54],"interpretable":[18],"and":[19,42,67,117,119,135],"transparent":[20],"decision-making":[21],"systems.":[22],"Scene":[23],"captioning,":[24],"which":[25],"generates":[26],"natural":[27],"language":[28],"descriptions":[29],"environment,":[33],"plays":[34],"a":[35,48,71,82],"crucial":[36],"role":[37],"in":[38,102,138],"enhancing":[39],"transparency,":[40],"safety,":[41],"human-AI":[43],"interaction.":[44],"We":[45],"introduce":[46],"BEV-LLM,":[47],"lightweight":[49],"model":[50],"for":[51,76],"3D":[52,63],"captioning":[53,130],"autonomous":[55],"scenes.":[57],"BEV-LLM":[58,88],"leverages":[59],"BEVFusion":[60],"combine":[62],"LiDAR":[64],"point":[65],"clouds":[66],"multi-view":[68],"images,":[69],"incorporating":[70],"novel":[72],"absolute":[73],"positional":[74],"encoding":[75],"view-specific":[77],"scene":[78,129],"descriptions.":[79],"Despite":[80],"using":[81],"small":[83],"1B":[84],"parameter":[85],"base":[86],"model,":[87],"achieves":[89],"competitive":[90],"performance":[91],"nuCaption":[94],"dataset,":[95],"surpassing":[96],"state-of-the-art":[97],"by":[98],"up":[99],"5%":[101],"BLEU":[103],"scores.":[104],"Additionally,":[105],"we":[106],"release":[107],"two":[108],"new":[109],"datasets":[110],"\u2014":[111,125],"nu-View":[112],"(focused":[113,121],"environmental":[115],"conditions":[116],"viewpoints)":[118],"GroundView":[120],"object":[123],"grounding)":[124],"better":[127],"assess":[128],"across":[131],"diverse":[132],"scenarios":[134],"address":[136],"gaps":[137],"current":[139],"benchmarks,":[140],"along":[141],"with":[142],"initial":[143],"benchmarking":[144],"results":[145],"demonstrating":[146],"their":[147],"effectiveness.":[148]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
