{"id":"https://openalex.org/W4408697157","doi":"https://doi.org/10.1109/itsc58415.2024.10919946","title":"ScenarioQA: Evaluating Test Scenario Reasoning Capabilities of Large Language Models","display_name":"ScenarioQA: Evaluating Test Scenario Reasoning Capabilities of Large Language Models","publication_year":2024,"publication_date":"2024-09-24","ids":{"openalex":"https://openalex.org/W4408697157","doi":"https://doi.org/10.1109/itsc58415.2024.10919946"},"language":"en","primary_location":{"id":"doi:10.1109/itsc58415.2024.10919946","is_oa":false,"landing_page_url":"https://doi.org/10.1109/itsc58415.2024.10919946","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 27th International Conference on Intelligent Transportation Systems (ITSC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101136110","display_name":"Shreya Sinha","orcid":null},"institutions":[{"id":"https://openalex.org/I185103710","display_name":"University of California, Santa Cruz","ror":"https://ror.org/03s65by71","country_code":"US","type":"education","lineage":["https://openalex.org/I185103710"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shreya Sinha","raw_affiliation_strings":["University of California, Santa Cruz,Electrical and Computer Engineering Department,Santa Cruz,CA"],"affiliations":[{"raw_affiliation_string":"University of California, Santa Cruz,Electrical and Computer Engineering Department,Santa Cruz,CA","institution_ids":["https://openalex.org/I185103710"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047505025","display_name":"Ishaan Paranjape","orcid":null},"institutions":[{"id":"https://openalex.org/I185103710","display_name":"University of California, Santa Cruz","ror":"https://ror.org/03s65by71","country_code":"US","type":"education","lineage":["https://openalex.org/I185103710"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ishaan Paranjape","raw_affiliation_strings":["University of California, Santa Cruz,Computational Media Department,Santa Cruz,CA"],"affiliations":[{"raw_affiliation_string":"University of California, Santa Cruz,Computational Media Department,Santa Cruz,CA","institution_ids":["https://openalex.org/I185103710"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037589547","display_name":"Jim Whitehead","orcid":"https://orcid.org/0000-0002-6887-7330"},"institutions":[{"id":"https://openalex.org/I185103710","display_name":"University of California, Santa Cruz","ror":"https://ror.org/03s65by71","country_code":"US","type":"education","lineage":["https://openalex.org/I185103710"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jim Whitehead","raw_affiliation_strings":["University of California, Santa Cruz,Computational Media Department,Santa Cruz,CA"],"affiliations":[{"raw_affiliation_string":"University of California, Santa Cruz,Computational Media Department,Santa Cruz,CA","institution_ids":["https://openalex.org/I185103710"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101136110"],"corresponding_institution_ids":["https://openalex.org/I185103710"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26365723,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"447","last_page":"453"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9861999750137329,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9861999750137329,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9696999788284302,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11450","display_name":"Model-Driven Software Engineering Techniques","score":0.9071999788284302,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7566156387329102},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.610917329788208},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4306044280529022},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41254308819770813},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.056430041790008545}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7566156387329102},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.610917329788208},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4306044280529022},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41254308819770813},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.056430041790008545},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/itsc58415.2024.10919946","is_oa":false,"landing_page_url":"https://doi.org/10.1109/itsc58415.2024.10919946","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 27th International Conference on Intelligent Transportation Systems (ITSC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2588522864","https://openalex.org/W2963657083","https://openalex.org/W2971107062","https://openalex.org/W3024414633","https://openalex.org/W3127647470","https://openalex.org/W3153839026","https://openalex.org/W3155776565","https://openalex.org/W3176495666","https://openalex.org/W3197916734","https://openalex.org/W3199958362","https://openalex.org/W4205870266","https://openalex.org/W4220704768","https://openalex.org/W4389471004","https://openalex.org/W4392669753","https://openalex.org/W4401386967","https://openalex.org/W6631399359","https://openalex.org/W6755829550","https://openalex.org/W6801666428","https://openalex.org/W6838461927","https://openalex.org/W6843405348","https://openalex.org/W6851275496","https://openalex.org/W6853235341"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Autonomous":[0],"Vehicles":[1],"(AVs)":[2],"have":[3],"the":[4,38,74,94,98,106,155],"potential":[5],"of":[6,26,48,77,86,97,101,108,111,145,157,166],"reducing":[7],"car":[8],"accidents":[9],"and":[10,68,125,150,169],"increasing":[11],"accessibility":[12],"to":[13,17,28,41,73,82,147],"transportation.":[14],"AVs":[15,33,39],"need":[16,40],"be":[18,42],"rigorously":[19],"tested.":[20],"Scenario-based":[21],"testing":[22,110],"offers":[23],"a":[24,45,163],"set":[25],"approaches":[27,52],"design":[29],"high-risk":[30],"tests":[31],"for":[32,44,117,120,185],"at":[34],"low":[35],"cost.":[36],"Since":[37],"tested":[43],"large":[46],"number":[47],"scenarios,":[49],"automated":[50],"generation":[51,139],"are":[53,60],"needed.":[54],"Pre-trained":[55],"Large":[56],"Language":[57],"Models":[58],"(LLMs)":[59],"open-input,":[61],"general-purpose":[62],"data":[63],"generators":[64],"with":[65],"good":[66],"learning":[67],"reasoning":[69,99],"abilities.":[70,88,187],"However,":[71],"due":[72],"black-box":[75],"nature":[76],"these":[78,186],"systems,":[79],"it's":[80],"difficult":[81],"get":[83],"direct":[84],"evidence":[85],"their":[87],"In":[89,177],"this":[90,167],"paper,":[91],"we":[92,127,179],"address":[93],"open":[95],"question":[96],"capabilities":[100],"pre-trained":[102,183],"LLMs":[103,184],"specifically":[104,153],"in":[105,154],"context":[107,156],"scenario-based":[109,158],"AVs.":[112],"Inspired":[113],"by":[114],"QA":[115,138,175],"benchmarks":[116],"LLM":[118],"evaluations":[119],"commonsense":[121],"reasoning,":[122,124],"science":[123],"more,":[126],"present":[128],"our":[129],"main":[130],"contribution,":[131],"ScenarioQA.":[132],"This":[133],"benchmark":[134],"involves":[135],"an":[136,143],"LLM-based":[137],"process":[140,168],"based":[141],"on":[142],"integration":[144],"methods":[146],"generate":[148],"questions":[149],"corresponding":[151],"answers":[152],"testing.":[159],"We":[160],"carry":[161],"out":[162],"comprehensive":[164],"evaluation":[165],"gain":[170],"valuable":[171],"insights":[172],"regarding":[173],"effective":[174],"generation.":[176],"addition,":[178],"evaluate":[180],"several":[181],"available":[182]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
