{"id":"https://openalex.org/W7116922770","doi":"https://doi.org/10.48550/arxiv.2512.17409","title":"meval: A Statistical Toolbox for Fine-Grained Model Performance Analysis","display_name":"meval: A Statistical Toolbox for Fine-Grained Model Performance Analysis","publication_year":2025,"publication_date":"2025-12-19","ids":{"openalex":"https://openalex.org/W7116922770","doi":"https://doi.org/10.48550/arxiv.2512.17409"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2512.17409","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.17409","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2512.17409","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119663294","display_name":"Dishantkumar Sutariya","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sutariya, Dishantkumar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5064219363","display_name":"Eike Petersen","orcid":"https://orcid.org/0000-0003-0097-3868"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Petersen, Eike","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5119663294"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12422","display_name":"Radiomics and Machine Learning in Medical Imaging","score":0.2630999982357025,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T12422","display_name":"Radiomics and Machine Learning in Medical Imaging","score":0.2630999982357025,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10862","display_name":"AI in cancer detection","score":0.155799999833107,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11289","display_name":"Single-cell and spatial transcriptomics","score":0.03610000014305115,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/toolbox","display_name":"Toolbox","score":0.9247999787330627},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.548799991607666},{"id":"https://openalex.org/keywords/performance-metric","display_name":"Performance metric","score":0.42879998683929443},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.41179999709129333},{"id":"https://openalex.org/keywords/sample-size-determination","display_name":"Sample size determination","score":0.40049999952316284},{"id":"https://openalex.org/keywords/statistical-model","display_name":"Statistical model","score":0.38100001215934753},{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.3513000011444092}],"concepts":[{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.9247999787330627},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6402000188827515},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5572999715805054},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.548799991607666},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48240000009536743},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.42879998683929443},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.41179999709129333},{"id":"https://openalex.org/C129848803","wikidata":"https://www.wikidata.org/wiki/Q2564360","display_name":"Sample size determination","level":2,"score":0.40049999952316284},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.38100001215934753},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.3513000011444092},{"id":"https://openalex.org/C2986587452","wikidata":"https://www.wikidata.org/wiki/Q938438","display_name":"Statistical analysis","level":2,"score":0.3402000069618225},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3269999921321869},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.3066999912261963},{"id":"https://openalex.org/C191795146","wikidata":"https://www.wikidata.org/wiki/Q3878446","display_name":"Norm (philosophy)","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C2982736386","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Statistical learning","level":2,"score":0.26589998602867126},{"id":"https://openalex.org/C87007009","wikidata":"https://www.wikidata.org/wiki/Q210832","display_name":"Statistical hypothesis testing","level":2,"score":0.26249998807907104},{"id":"https://openalex.org/C3020318244","wikidata":"https://www.wikidata.org/wiki/Q4812187","display_name":"Large sample","level":2,"score":0.2538999915122986},{"id":"https://openalex.org/C31601959","wikidata":"https://www.wikidata.org/wiki/Q931309","display_name":"Medical imaging","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2512.17409","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.17409","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2512.17409","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.17409","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Analyzing":[0],"machine":[1],"learning":[2],"model":[3,23],"performance":[4,38,127],"stratified":[5],"by":[6,144],"patient":[7],"and":[8,16,54,62,82,114,163],"recording":[9],"properties":[10],"is":[11,34,134],"becoming":[12],"the":[13,84,95,132,145,160,171],"accepted":[14],"norm":[15],"often":[17],"yields":[18],"crucial":[19],"insights":[20],"about":[21],"important":[22],"failure":[24],"modes.":[25],"Performing":[26],"such":[27],"analyses":[28,142],"in":[29,68,83,149,154,165],"a":[30,107],"statistically":[31],"rigorous":[32],"manner":[33],"non-trivial,":[35],"however.":[36],"Appropriate":[37],"metrics":[39],"must":[40,59,90],"be":[41,60,65,77,91],"selected":[42],"that":[43,110],"allow":[44],"for":[45,124,137],"valid":[46],"comparisons":[47,64],"between":[48],"groups":[49],"of":[50,86],"different":[51],"sample":[52],"sizes":[53],"base":[55],"rates;":[56],"metric":[57],"uncertainty":[58],"determined":[61],"multiple":[63],"corrected":[66],"for,":[67],"order":[69],"to":[70,80,93,117],"assess":[71,121],"whether":[72],"any":[73],"observed":[74],"differences":[75],"may":[76],"purely":[78],"due":[79],"chance;":[81],"case":[85,151],"intersectional":[87],"analyses,":[88],"mechanisms":[89],"implemented":[92],"find":[94],"most":[96],"`interesting'":[97],"subgroups":[98],"within":[99],"combinatorially":[100],"many":[101],"subgroup":[102,126],"combinations.":[103],"We":[104],"here":[105],"present":[106],"statistical":[108],"toolbox":[109,133,146],"addresses":[111],"these":[112],"challenges":[113],"enables":[115],"practitioners":[116],"easily":[118],"yet":[119],"rigorously":[120],"their":[122],"models":[123],"potential":[125],"disparities.":[128],"While":[129],"broadly":[130],"applicable,":[131],"specifically":[135],"designed":[136],"medical":[138],"imaging":[139],"applications.":[140],"The":[141],"provided":[143],"are":[147],"illustrated":[148],"two":[150],"studies,":[152],"one":[153,164],"skin":[155],"lesion":[156],"malignancy":[157],"classification":[158,169],"on":[159,170],"ISIC2020":[161],"dataset":[162],"chest":[166],"X-ray-based":[167],"disease":[168],"MIMIC-CXR":[172],"dataset.":[173]},"counts_by_year":[],"updated_date":"2025-12-23T23:15:37.779995","created_date":"2025-12-23T00:00:00"}
