{"id":"https://openalex.org/W7159645015","doi":"https://doi.org/10.48550/arxiv.2604.27093","title":"Useless but Safe? Benchmarking Utility Recovery with User Intent Clarification in Multi-Turn Conversations","display_name":"Useless but Safe? Benchmarking Utility Recovery with User Intent Clarification in Multi-Turn Conversations","publication_year":2026,"publication_date":"2026-04-29","ids":{"openalex":"https://openalex.org/W7159645015","doi":"https://doi.org/10.48550/arxiv.2604.27093"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.27093","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27093","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.27093","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024099895","display_name":"Mingqian Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Mingqian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134964229","display_name":"Malia Morgan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Morgan, Malia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134972602","display_name":"Liwei Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Liwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134941581","display_name":"Carolyn Rose","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rose, Carolyn","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134971139","display_name":"Maarten Sap","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sap, Maarten","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.527999997138977,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.527999997138977,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10734","display_name":"Information and Cyber Security","score":0.08139999955892563,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.03689999878406525,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.5898000001907349},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.5530999898910522},{"id":"https://openalex.org/keywords/conversation","display_name":"Conversation","score":0.5479000210762024},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5084999799728394},{"id":"https://openalex.org/keywords/helpfulness","display_name":"Helpfulness","score":0.4449000060558319},{"id":"https://openalex.org/keywords/interpretation","display_name":"Interpretation (philosophy)","score":0.3953999876976013},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.34529998898506165},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3197000026702881}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7203999757766724},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.5898000001907349},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.5530999898910522},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.5479000210762024},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5084999799728394},{"id":"https://openalex.org/C2781265381","wikidata":"https://www.wikidata.org/wiki/Q5710255","display_name":"Helpfulness","level":2,"score":0.4449000060558319},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.3953999876976013},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.3653999865055084},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.34529998898506165},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3197000026702881},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.31380000710487366},{"id":"https://openalex.org/C2776544517","wikidata":"https://www.wikidata.org/wiki/Q189447","display_name":"Unexpected events","level":2,"score":0.31360000371932983},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.31279999017715454},{"id":"https://openalex.org/C140547941","wikidata":"https://www.wikidata.org/wiki/Q7797194","display_name":"Threat model","level":2,"score":0.3073999881744385},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.30169999599456787},{"id":"https://openalex.org/C2778414658","wikidata":"https://www.wikidata.org/wiki/Q1409206","display_name":"Model risk","level":3,"score":0.29499998688697815},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.2935999929904938},{"id":"https://openalex.org/C168725872","wikidata":"https://www.wikidata.org/wiki/Q991663","display_name":"Sophistication","level":2,"score":0.29350000619888306},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.2915000021457672},{"id":"https://openalex.org/C67712803","wikidata":"https://www.wikidata.org/wiki/Q7901853","display_name":"User modeling","level":3,"score":0.2906999886035919},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2809999883174896},{"id":"https://openalex.org/C113336015","wikidata":"https://www.wikidata.org/wiki/Q574010","display_name":"Complete information","level":2,"score":0.27399998903274536},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2669999897480011},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.26589998602867126},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.2628999948501587}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.27093","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27093","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.27093","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27093","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.6826040148735046,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Current":[0],"LLM":[1],"safety":[2,201],"alignment":[3],"techniques":[4],"improve":[5],"model":[6,92,105,188,197,208,229,243],"robustness":[7],"against":[8],"adversarial":[9],"attacks,":[10],"but":[11],"overlook":[12],"whether":[13,35,241],"and":[14,44,79,203],"how":[15,102,226],"LLMs":[16,36],"can":[17,37],"recover":[18,45],"helpfulness":[19],"when":[20],"benign":[21,60,110,126,135,154],"users":[22],"clarify":[23],"their":[24,39],"intent.":[25,253],"We":[26,94,175],"introduce":[27],"CarryOnBench,":[28],"the":[29,108,124,130,134,228],"first":[30],"interactive":[31],"benchmark":[32],"that":[33,100,142,236],"measures":[34],"revise":[38],"interpretation":[40],"of":[41,87,123,160,225],"user":[42,69,252],"intent":[43,136,148],"utility,":[46],"while":[47],"remaining":[48],"safe":[49],"through":[50],"multi-turn":[51,157],"conversations.":[52],"Starting":[53],"from":[54],"398":[55],"seemingly":[56],"harmful":[57],"queries":[58],"with":[59],"underlying":[61],"intents,":[62],"we":[63],"simulate":[64],"5,970":[65],"conversations":[66,218],"by":[67],"varying":[68],"follow-up":[70],"sequences,":[71],"evaluating":[72],"14":[73,161],"models":[74,119,138,143,162],"on":[75],"both":[76],"intent-aligned":[77],"utility":[78,184],"safety.":[80],"CarryOnBench":[81],"yields":[82],"1,866":[83],"different":[84],"conversation":[85],"flows":[86],"4--12":[88],"turns,":[89],"totaling":[90],"23,880":[91],"responses.":[93],"design":[95],"Ben-Util,":[96],"a":[97,187,196,207,234,242],"checklist-based":[98],"metric":[99],"evaluates":[101],"well":[103],"each":[104],"response":[106],"fulfills":[107],"user's":[109,125],"information":[111,127,145],"need":[112],"using":[113],"atomic":[114],"items.":[115],"At":[116],"turn":[117],"one,":[118],"fulfill":[120,139],"only":[121],"10.5--37.6%":[122],"need.":[128],"When":[129],"same":[131],"query":[132],"includes":[133],"upfront,":[137],"25.1--72.1%,":[140],"confirming":[141],"withhold":[144],"due":[146],"to":[147,181,220,250],"misinterpretation,":[149],"not":[150],"limited":[151],"knowledge.":[152],"With":[153],"clarifications":[155],"in":[156],"conversations,":[158],"13":[159],"approach":[163],"or":[164,247],"exceed":[165],"this":[166],"single-turn":[167,182,237],"baseline,":[168],"yet":[169],"recovery":[170],"cost":[171],"varies":[172],"across":[173],"models.":[174],"identify":[176],"three":[177],"failure":[178],"modes":[179],"invisible":[180],"evaluations:":[183],"lock-in,":[185],"where":[186,195,206],"rarely":[189],"updates":[190,198],"despite":[191],"clarification;":[192],"unsafe":[193],"recovery,":[194,205],"at":[199],"disproportionate":[200],"cost;":[202],"repetitive":[204],"recycles":[209],"prior":[210],"responses":[211],"rather":[212],"than":[213],"providing":[214],"new":[215],"information.":[216],"Moreover,":[217],"converge":[219],"similar":[221],"harmfulness":[222],"levels":[223],"regardless":[224],"conservative":[227],"starts.":[230],"These":[231],"findings":[232],"expose":[233],"gap":[235],"evaluations":[238],"miss":[239],"--":[240],"is":[244],"appropriately":[245],"cautious":[246],"simply":[248],"unresponsive":[249],"clarified":[251]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-02T00:00:00"}
