{"id":"https://openalex.org/W7133322024","doi":"https://doi.org/10.48550/arxiv.2603.01630","title":"SEED-SET: Scalable Evolving Experimental Design for System-level Ethical Testing","display_name":"SEED-SET: Scalable Evolving Experimental Design for System-level Ethical Testing","publication_year":2026,"publication_date":"2026-03-02","ids":{"openalex":"https://openalex.org/W7133322024","doi":"https://doi.org/10.48550/arxiv.2603.01630"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.01630","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01630","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.01630","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5127973021","display_name":"Anjali Parashar","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Parashar, Anjali","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125197918","display_name":"Yingke Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yingke","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012689023","display_name":"Eric Yang Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Eric Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127986338","display_name":"Fei Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Fei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128020453","display_name":"James Neidhoefer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Neidhoefer, James","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005728773","display_name":"Devesh Upadhyay","orcid":"https://orcid.org/0000-0002-2399-1850"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Upadhyay, Devesh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5121504364","display_name":"Chuchu Fan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Chuchu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5127973021"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.7716000080108643,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.7716000080108643,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.1152999997138977,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.019500000402331352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.8299000263214111},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5748999714851379},{"id":"https://openalex.org/keywords/stakeholder","display_name":"Stakeholder","score":0.4708999991416931},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.4323999881744385},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.3276999890804291},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.3190999925136566},{"id":"https://openalex.org/keywords/bayesian-optimization","display_name":"Bayesian optimization","score":0.3174000084400177},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.31690001487731934}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.8299000263214111},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6744999885559082},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5748999714851379},{"id":"https://openalex.org/C201305675","wikidata":"https://www.wikidata.org/wiki/Q852998","display_name":"Stakeholder","level":2,"score":0.4708999991416931},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4684000015258789},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45809999108314514},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.4323999881744385},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.33399999141693115},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.3276999890804291},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3190999925136566},{"id":"https://openalex.org/C2778049539","wikidata":"https://www.wikidata.org/wiki/Q17002908","display_name":"Bayesian optimization","level":2,"score":0.3174000084400177},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.31690001487731934},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.30550000071525574},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.2962999939918518},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.2946000099182129},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.2879999876022339},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.28369998931884766},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2759000062942505},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.26489999890327454},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.2606000006198883},{"id":"https://openalex.org/C80519477","wikidata":"https://www.wikidata.org/wiki/Q3532236","display_name":"Scenario testing","level":3,"score":0.25929999351501465},{"id":"https://openalex.org/C128942645","wikidata":"https://www.wikidata.org/wiki/Q1568346","display_name":"Test case","level":3,"score":0.257099986076355},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.25380000472068787},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.25279998779296875},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.25}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.01630","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01630","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.01630","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01630","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.7840033769607544,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"As":[0],"autonomous":[1,129],"systems":[2,43],"such":[3],"as":[4],"drones,":[5],"become":[6],"increasingly":[7],"deployed":[8],"in":[9,36,168],"high-stakes,":[10],"human-centric":[11],"domains,":[12],"it":[13],"is":[14,44],"critical":[15],"to":[16,23,29,47,103,138,157,163],"evaluate":[17],"the":[18,48,118,140],"ethical":[19,39,126],"alignment":[20],"since":[21],"failure":[22],"do":[24],"so":[25],"imposes":[26],"imminent":[27],"danger":[28],"human":[30],"lives,":[31],"and":[32,56,81,97,113,134,147,152],"long":[33],"term":[34],"bias":[35],"decision-making.":[37],"Automated":[38],"benchmarking":[40,127],"of":[41,50,128,170],"these":[42,66],"understudied":[45],"due":[46],"lack":[49],"ubiquitous,":[51],"well-defined":[52],"metrics":[53],"for":[54,125],"evaluation,":[55],"stakeholder-specific":[57],"subjectivity,":[58],"which":[59],"cannot":[60],"be":[61],"modeled":[62],"analytically.":[63],"To":[64],"address":[65],"challenges,":[67],"we":[68],"propose":[69,104],"SEED-SET,":[70],"a":[71,99],"Bayesian":[72],"experimental":[73],"design":[74],"framework":[75],"that":[76,115],"incorporates":[77],"domain-specific":[78],"objective":[79],"evaluations,":[80],"subjective":[82],"value":[83],"judgments":[84],"from":[85],"stakeholders.":[86],"SEED-SET":[87],"models":[88],"both":[89],"evaluation":[90],"types":[91],"separately":[92],"with":[93,117,165],"hierarchical":[94],"Gaussian":[95],"Processes,":[96],"uses":[98],"novel":[100],"acquisition":[101],"strategy":[102],"interesting":[105],"test":[106,160],"candidates":[107,161],"based":[108],"on":[109,131],"learnt":[110],"qualitative":[111],"preferences":[112],"objectives":[114],"align":[116],"stakeholder":[119],"preferences.":[120],"We":[121],"validate":[122],"our":[123,136],"approach":[124],"agents":[130],"two":[132],"applications":[133],"find":[135],"method":[137,143],"perform":[139],"best.":[141],"Our":[142],"provides":[144],"an":[145],"interpretable":[146],"efficient":[148],"trade-off":[149],"between":[150],"exploration":[151],"exploitation,":[153],"by":[154],"generating":[155],"up":[156],"$2\\times$":[158],"optimal":[159],"compared":[162],"baselines,":[164],"$1.25\\times$":[166],"improvement":[167],"coverage":[169],"high":[171],"dimensional":[172],"search":[173],"spaces.":[174]},"counts_by_year":[],"updated_date":"2026-03-04T07:09:34.246503","created_date":"2026-03-04T00:00:00"}
