{"id":"https://openalex.org/W7133325107","doi":"https://doi.org/10.48550/arxiv.2603.00063","title":"Measuring What AI Systems Might Do: Towards A Measurement Science in AI","display_name":"Measuring What AI Systems Might Do: Towards A Measurement Science in AI","publication_year":2026,"publication_date":"2026-02-10","ids":{"openalex":"https://openalex.org/W7133325107","doi":"https://doi.org/10.48550/arxiv.2603.00063"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.00063","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00063","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.00063","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5127957678","display_name":"Konstantinos Voudouris","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Voudouris, Konstantinos","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021025575","display_name":"Mirko Thalmann","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thalmann, Mirko","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127997236","display_name":"Alex Kipnis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kipnis, Alex","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127961789","display_name":"Jos\u00e9 Hern\u00e1ndez-Orallo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hern\u00e1ndez-Orallo, Jos\u00e9","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125198117","display_name":"Eric Schulz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schulz, Eric","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5127957678"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.5651000142097473,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.5651000142097473,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.13369999825954437,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14350","display_name":"Innovation, Sustainability, Human-Machine Systems","score":0.0471000000834465,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/counterfactual-thinking","display_name":"Counterfactual thinking","score":0.8615999817848206},{"id":"https://openalex.org/keywords/conflation","display_name":"Conflation","score":0.6553000211715698},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.513700008392334},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.4584999978542328},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.44519999623298645},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4092999994754791},{"id":"https://openalex.org/keywords/counterfactual-conditional","display_name":"Counterfactual conditional","score":0.34529998898506165},{"id":"https://openalex.org/keywords/argument","display_name":"Argument (complex analysis)","score":0.30379998683929443}],"concepts":[{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.8615999817848206},{"id":"https://openalex.org/C130440534","wikidata":"https://www.wikidata.org/wiki/Q14946528","display_name":"Conflation","level":2,"score":0.6553000211715698},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5259000062942505},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5174000263214111},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.513700008392334},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.4584999978542328},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.44519999623298645},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.4438000023365021},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4092999994754791},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.37860000133514404},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.35830000042915344},{"id":"https://openalex.org/C71889745","wikidata":"https://www.wikidata.org/wiki/Q1783264","display_name":"Counterfactual conditional","level":3,"score":0.34529998898506165},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.3230000138282776},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3199000060558319},{"id":"https://openalex.org/C98184364","wikidata":"https://www.wikidata.org/wiki/Q1780131","display_name":"Argument (complex analysis)","level":2,"score":0.30379998683929443},{"id":"https://openalex.org/C110099512","wikidata":"https://www.wikidata.org/wiki/Q59115","display_name":"Philosophy of science","level":2,"score":0.2847000062465668},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.27720001339912415},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.2644999921321869},{"id":"https://openalex.org/C22467394","wikidata":"https://www.wikidata.org/wiki/Q849359","display_name":"Multidisciplinary approach","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C29554801","wikidata":"https://www.wikidata.org/wiki/Q147027","display_name":"Thought experiment","level":2,"score":0.26249998807907104},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.2612000107765198},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26109999418258667},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.260699987411499},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.2542000114917755},{"id":"https://openalex.org/C30772137","wikidata":"https://www.wikidata.org/wiki/Q5164762","display_name":"Consumption (sociology)","level":2,"score":0.2535000145435333},{"id":"https://openalex.org/C171606756","wikidata":"https://www.wikidata.org/wiki/Q506132","display_name":"Psychometrics","level":2,"score":0.25209999084472656},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.2502000033855438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.00063","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00063","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.00063","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00063","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Scientists,":[0],"policy-makers,":[1],"business":[2],"leaders,":[3],"and":[4,28,34,55,72,90,94,139,150,163],"members":[5],"of":[6,63,106,135,147],"the":[7,104,107],"public":[8],"care":[9],"about":[10],"what":[11,45,165],"modern":[12],"artificial":[13],"intelligence":[14],"systems":[15,64],"are":[16,30,57,84],"disposed":[17],"to":[18,49,111,117,160],"do.":[19],"Yet":[20],"terms":[21],"such":[22,121],"as":[23,122,152],"capabilities,":[24],"propensities,":[25],"skills,":[26],"values,":[27],"abilities":[29],"routinely":[31],"used":[32],"interchangeably":[33],"conflated":[35],"with":[36,39],"observable":[37],"performance,":[38],"AI":[40,112,148,169],"evaluation":[41,157,170],"practices":[42,158],"rarely":[43],"specifying":[44],"quantity":[46],"they":[47],"purport":[48],"measure.":[50],"We":[51],"argue":[52],"that":[53],"capabilities":[54,149],"propensities":[56,151],"dispositional":[58],"properties":[59,83,102],"-":[60],"stable":[61],"features":[62],"characterised":[65],"by":[66],"counterfactual":[67],"relationships":[68],"between":[69],"contextual":[70,82],"conditions":[71],"behavioural":[73],"outputs.":[74],"Measuring":[75],"a":[76,144],"disposition":[77],"requires":[78],"(i)":[79],"hypothesising":[80],"which":[81],"causally":[85],"relevant,":[86],"(ii)":[87],"independently":[88],"operationalising":[89],"measuring":[91],"those":[92,101],"properties,":[93],"(iii)":[95],"empirically":[96],"mapping":[97],"how":[98],"variation":[99],"in":[100],"affects":[103],"probability":[105],"behaviour.":[108],"Dominant":[109],"approaches":[110],"evaluation,":[113],"from":[114,133],"benchmark":[115],"averages":[116],"data-driven":[118],"latent-variable":[119],"models":[120],"Item":[123],"Response":[124],"Theory,":[125],"bypass":[126],"these":[127],"steps":[128],"entirely.":[129],"Building":[130],"on":[131],"ideas":[132],"philosophy":[134],"science,":[136,141],"measurement":[137],"theory,":[138],"cognitive":[140],"we":[142],"develop":[143],"principled":[145],"account":[146],"dispositions,":[153],"show":[154],"why":[155],"prevailing":[156],"fail":[159],"measure":[161],"them,":[162],"outline":[164],"disposition-respecting,":[166],"scientifically":[167],"defensible":[168],"would":[171],"require.":[172]},"counts_by_year":[],"updated_date":"2026-03-04T07:09:34.246503","created_date":"2026-03-04T00:00:00"}
