{"id":"https://openalex.org/W7161986152","doi":"https://doi.org/10.48550/arxiv.2605.21413","title":"Teaching AI Through Benchmark Construction: QuestBench as a Course-Based Practice for Accountable Knowledge Work","display_name":"Teaching AI Through Benchmark Construction: QuestBench as a Course-Based Practice for Accountable Knowledge Work","publication_year":2026,"publication_date":"2026-05-20","ids":{"openalex":"https://openalex.org/W7161986152","doi":"https://doi.org/10.48550/arxiv.2605.21413"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.21413","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.21413","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.21413","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136717817","display_name":"Haiyang Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Haiyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136712999","display_name":"Jiuzheng Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jiuzheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136678597","display_name":"Taian Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Taian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104261481","display_name":"Mugeng Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Mugeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136667989","display_name":"Wenchun Jing","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jing, Wenchun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044039411","display_name":"C PAN","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pan, Chongyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136722877","display_name":"Siqi Zhong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Siqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136686843","display_name":"Zhiyang Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zhiyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070313369","display_name":"Weichen Bi","orcid":"https://orcid.org/0000-0002-9465-910X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bi, Weichen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114731511","display_name":"Yudong Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Yudong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062486535","display_name":"Xiaoying Bai","orcid":"https://orcid.org/0000-0003-3989-4075"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bai, Xiaoying","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136614018","display_name":"Yun Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Yun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.13459999859333038,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.13459999859333038,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.09709999710321426,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.07840000092983246,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7698000073432922},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.6542999744415283},{"id":"https://openalex.org/keywords/artifact","display_name":"Artifact (error)","score":0.6281999945640564},{"id":"https://openalex.org/keywords/trustworthiness","display_name":"Trustworthiness","score":0.5214999914169312},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3912999927997589},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.38370001316070557},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.3598000109195709}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7698000073432922},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.6542999744415283},{"id":"https://openalex.org/C2779010991","wikidata":"https://www.wikidata.org/wiki/Q2720909","display_name":"Artifact (error)","level":2,"score":0.6281999945640564},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6172999739646912},{"id":"https://openalex.org/C153701036","wikidata":"https://www.wikidata.org/wiki/Q659974","display_name":"Trustworthiness","level":2,"score":0.5214999914169312},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4609000086784363},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3912999927997589},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.38370001316070557},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.36250001192092896},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.3598000109195709},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.35339999198913574},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3465999960899353},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.32580000162124634},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2994000017642975},{"id":"https://openalex.org/C8795937","wikidata":"https://www.wikidata.org/wiki/Q11862829","display_name":"Discipline","level":2,"score":0.2946000099182129},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.28619998693466187},{"id":"https://openalex.org/C7991579","wikidata":"https://www.wikidata.org/wiki/Q17955","display_name":"Criticism","level":2,"score":0.2822999954223633},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2815999984741211},{"id":"https://openalex.org/C115925183","wikidata":"https://www.wikidata.org/wiki/Q1412694","display_name":"Knowledge-based systems","level":2,"score":0.2551000118255615},{"id":"https://openalex.org/C204983608","wikidata":"https://www.wikidata.org/wiki/Q2111958","display_name":"Productivity","level":2,"score":0.2524999976158142}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.21413","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.21413","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.21413","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.21413","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5750537514686584}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"As":[0],"AI":[1,34,46,66,100,220,229,257],"becomes":[2],"part":[3],"of":[4,78,132],"everyday":[5],"learning,":[6],"many":[7],"courses":[8],"teach":[9],"students":[10,42,109,212,250],"to":[11,20,44,112,119],"use":[12,27],"it":[13],"mainly":[14],"as":[15,74,218,224,234,239,256],"a":[16,38,61,75,113,122,174,235,240,245],"productivity":[17],"tool:":[18],"how":[19,186,249],"prompt,":[21],"search,":[22],"summarize,":[23],"write,":[24],"code,":[25],"and":[26,47,96,98,138,168,238,260],"tools":[28],"more":[29],"efficiently.":[30],"We":[31,231],"argue":[32],"that":[33,64,145,207],"education":[35],"also":[36],"needs":[37],"setting":[39,243],"in":[40,52,151],"which":[41],"learn":[43],"test":[45],"understand":[48],"their":[49],"own":[50],"role":[51],"judging":[53,228],"machine-produced":[54],"knowledge.":[55],"To":[56],"this":[57],"end,":[58],"we":[59],"introduce":[60],"course-based":[62],"practice":[63],"teaches":[65],"through":[67],"benchmark":[68,208,236],"construction,":[69],"using":[70],"deep":[71,153],"research":[72,154],"systems":[73,101],"concrete":[76],"example":[77],"AI-era":[79],"knowledge":[80,85,215,254],"work.":[81,262],"Students":[82],"turn":[83],"disciplinary":[84],"into":[86],"verifiable":[87],"expert-level":[88],"questions,":[89],"review":[90],"one":[91],"another's":[92],"designs":[93],"for":[94,227,244],"ambiguity":[95],"shortcuts,":[97],"evaluate":[99],"on":[102,142],"the":[103,160,169,193,225],"resulting":[104],"tasks.":[105],"This":[106],"activity":[107],"gives":[108],"direct":[110],"exposure":[111],"powerful":[114],"tool":[115],"while":[116],"asking":[117],"them":[118],"specify":[120],"what":[121],"trustworthy":[123],"answer":[124],"would":[125],"require.":[126],"The":[127,178,263],"produced":[128],"benchmark,":[129],"QuestBench,":[130],"consists":[131],"256":[133],"questions":[134],"across":[135,156],"14":[136],"humanities":[137],"social-science":[139],"domains.":[140],"Evaluation":[141],"QuestBench":[143,233],"shows":[144],"student-designed":[146],"tasks":[147],"reveal":[148],"hidden":[149],"failures":[150,179],"current":[152],"systems:":[155],"thirteen":[157],"evaluated":[158],"systems,":[159],"mean":[161],"question-level":[162],"pass":[163,176],"rate":[164],"is":[165,265],"only":[166,217],"16.85%,":[167],"best-performing":[170],"system,":[171],"GPT-5.5,":[172],"reaches":[173],"57.58%":[175],"rate.":[177],"are":[180],"educationally":[181],"useful":[182],"because":[183],"they":[184],"show":[185],"fluent,":[187],"source-backed":[188],"answers":[189],"can":[190,210,251],"still":[191],"miss":[192],"right":[194],"query,":[195],"source,":[196],"term,":[197],"or":[198],"evidence":[199],"standard.":[200],"Reflections":[201],"from":[202],"five":[203],"student":[204],"contributors":[205],"suggest":[206],"construction":[209],"help":[211],"see":[213],"professional":[214,261],"not":[216],"content":[219],"may":[221],"retrieve,":[222],"but":[223],"basis":[226],"outputs.":[230],"present":[232],"artifact":[237],"reusable":[241],"classroom":[242],"larger":[246],"educational":[247],"question:":[248],"remain":[252],"responsible":[253],"actors":[255],"enters":[258],"learning":[259],"dataset":[264],"available":[266],"at":[267],"https://huggingface.co/datasets/PKUAIWeb/QuestBench/tree/main.":[268]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-22T00:00:00"}
