{"id":"https://openalex.org/W4404179356","doi":"https://doi.org/10.1109/lsp.2024.3495557","title":"Empowering Corner Case Detection in Autonomous Vehicles With Multimodal Large Language Models","display_name":"Empowering Corner Case Detection in Autonomous Vehicles With Multimodal Large Language Models","publication_year":2024,"publication_date":"2024-11-08","ids":{"openalex":"https://openalex.org/W4404179356","doi":"https://doi.org/10.1109/lsp.2024.3495557"},"language":"en","primary_location":{"id":"doi:10.1109/lsp.2024.3495557","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2024.3495557","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100618133","display_name":"Tianqi Liu","orcid":"https://orcid.org/0000-0001-9407-231X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tianqi Liu","raw_affiliation_strings":["Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064637838","display_name":"Yanjun Qin","orcid":"https://orcid.org/0000-0002-8020-493X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanjun Qin","raw_affiliation_strings":["Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003507575","display_name":"Shanghang Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shanghang Zhang","raw_affiliation_strings":["School of Computer Science, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113424196","display_name":"Xiaoming Tao","orcid":"https://orcid.org/0009-0006-0951-2485"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoming Tao","raw_affiliation_strings":["Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100618133"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":2.1757,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.89617473,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"32","issue":null,"first_page":"51","last_page":"55"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9366000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9366000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6633774638175964},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4966757893562317},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.43862974643707275},{"id":"https://openalex.org/keywords/remotely-operated-underwater-vehicle","display_name":"Remotely operated underwater vehicle","score":0.414591908454895},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3389687240123749},{"id":"https://openalex.org/keywords/mobile-robot","display_name":"Mobile robot","score":0.29426783323287964},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.2592027187347412}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6633774638175964},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4966757893562317},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.43862974643707275},{"id":"https://openalex.org/C145424490","wikidata":"https://www.wikidata.org/wiki/Q618465","display_name":"Remotely operated underwater vehicle","level":4,"score":0.414591908454895},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3389687240123749},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.29426783323287964},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.2592027187347412}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lsp.2024.3495557","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2024.3495557","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1861492603","https://openalex.org/W2150066425","https://openalex.org/W2489434015","https://openalex.org/W2910453440","https://openalex.org/W2963109634","https://openalex.org/W2963351448","https://openalex.org/W2988452521","https://openalex.org/W3035574168","https://openalex.org/W3109915642","https://openalex.org/W3120841571","https://openalex.org/W3135934332","https://openalex.org/W3138516171","https://openalex.org/W4221143499","https://openalex.org/W4246399668","https://openalex.org/W4312956471","https://openalex.org/W4379929801","https://openalex.org/W4391768728","https://openalex.org/W4394862623","https://openalex.org/W4402716047","https://openalex.org/W4402727922","https://openalex.org/W4404612908","https://openalex.org/W6791353385","https://openalex.org/W6797589674","https://openalex.org/W6851950068","https://openalex.org/W6854262950","https://openalex.org/W6858029246","https://openalex.org/W6859600322","https://openalex.org/W6862957787"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Object":[0],"detection":[1,41,128,154],"powered":[2],"by":[3,22],"deep":[4],"learning":[5,139],"is":[6,112],"an":[7,69,180,196],"essential":[8],"component":[9],"in":[10,61,185,217],"the":[11,17,37,56,86,98,107,120,126,134,145,153,213,218],"realm":[12],"of":[13,39,58,68,100,141,148,182,198,215,220],"self-driving":[14],"vehicles.":[15],"However,":[16],"model":[18,87],"may":[19],"be":[20],"affected":[21],"corner":[23,59,101,159],"cases,":[24],"which":[25],"are":[26],"rare":[27],"or":[28],"unusual":[29],"objects":[30],"and":[31,33,79,105,137,150,155,187,195],"scenarios,":[32],"can":[34,211],"significantly":[35],"impact":[36],"reliability":[38],"object":[40,127],"systems.":[42,64,229],"In":[43],"this":[44],"paper,":[45],"we":[46],"applied":[47],"a":[48,72,75,80],"Multimodal":[49],"Large":[50],"Language":[51],"Model":[52],"(MLLM)":[53],"to":[54,88,115,122,125,143,158,165,191,202,224],"address":[55],"challenge":[57],"cases":[60,160],"autonomous":[62,221],"driving":[63],"The":[65,130,167],"MLLM":[66,121,172],"consists":[67],"image":[70],"encoder,":[71],"text":[73,95,149],"tokenizer,":[74],"modal":[76],"alignment":[77],"layer,":[78],"pre-trained":[81],"large":[82],"language":[83],"model,":[84],"enabling":[85,152],"understand":[89],"multimodal":[90],"semantic":[91,146],"information.":[92],"We":[93,206],"added":[94],"descriptions":[96],"on":[97,119],"basis":[99],"case":[102],"dataset":[103,111],"CODA":[104],"constructed":[106],"CODA-REC":[108],"dataset.":[109],"This":[110],"then":[113],"used":[114],"perform":[116],"instruction":[117],"fine-tuning":[118],"adapt":[123],"it":[124],"task.":[129],"proposed":[131],"method":[132],"leverages":[133],"extensive":[135],"knowledge":[136],"zero-shot":[138],"capabilities":[140],"LLMs":[142],"enhance":[144],"understanding":[147],"images,":[151],"appropriate":[156],"response":[157],"that":[161,171,208],"were":[162],"previously":[163],"difficult":[164],"handle.":[166],"experimental":[168],"results":[169],"show":[170],"achieved":[173],"better":[174],"performance":[175],"than":[176],"baseline":[177],"models,":[178,194],"with":[179],"improvement":[181,197],"about":[183],"10%":[184,199],"mAR":[186],"mAP":[188,200],"metrics":[189],"compared":[190,201],"most":[192],"closed-set":[193],"open":[203],"set":[204],"models.":[205],"hope":[207],"our":[209],"work":[210],"inspire":[212],"application":[214],"MLLMs":[216],"field":[219],"driving,":[222],"contributing":[223],"more":[225],"advanced":[226],"intelligent":[227],"transportation":[228]},"counts_by_year":[{"year":2025,"cited_by_count":6}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
