{"id":"https://openalex.org/W7159657815","doi":"https://doi.org/10.48550/arxiv.2604.27374","title":"Measurement Risk in Supervised Financial NLP: Rubric and Metric Sensitivity on JF-ICR","display_name":"Measurement Risk in Supervised Financial NLP: Rubric and Metric Sensitivity on JF-ICR","publication_year":2026,"publication_date":"2026-04-30","ids":{"openalex":"https://openalex.org/W7159657815","doi":"https://doi.org/10.48550/arxiv.2604.27374"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.27374","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27374","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.27374","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134868122","display_name":"Sidi Chang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chang, Sidi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134823734","display_name":"Peiying Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Peiying","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134951013","display_name":"Yuxiao Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yuxiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134936542","display_name":"Rongdong Chai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chai, Rongdong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10081","display_name":"Auditing, Earnings Management, Governance","score":0.38089999556541443,"subfield":{"id":"https://openalex.org/subfields/1402","display_name":"Accounting"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10081","display_name":"Auditing, Earnings Management, Governance","score":0.38089999556541443,"subfield":{"id":"https://openalex.org/subfields/1402","display_name":"Accounting"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.18549999594688416,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11653","display_name":"Financial Distress and Bankruptcy Prediction","score":0.0778999999165535,"subfield":{"id":"https://openalex.org/subfields/1402","display_name":"Accounting"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rubric","display_name":"Rubric","score":0.9114000201225281},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.8116000294685364},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6614000201225281},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.6301000118255615},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.5662000179290771},{"id":"https://openalex.org/keywords/earnings","display_name":"Earnings","score":0.4814000129699707},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.36660000681877136},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.3582000136375427},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.3411000072956085}],"concepts":[{"id":"https://openalex.org/C111640148","wikidata":"https://www.wikidata.org/wiki/Q847349","display_name":"Rubric","level":2,"score":0.9114000201225281},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.8116000294685364},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6614000201225281},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.6301000118255615},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.5662000179290771},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5180000066757202},{"id":"https://openalex.org/C2781426361","wikidata":"https://www.wikidata.org/wiki/Q5326940","display_name":"Earnings","level":2,"score":0.4814000129699707},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47620001435279846},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41819998621940613},{"id":"https://openalex.org/C162118730","wikidata":"https://www.wikidata.org/wiki/Q1128453","display_name":"Actuarial science","level":1,"score":0.40849998593330383},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.3921000063419342},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.36660000681877136},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.3582000136375427},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.3411000072956085},{"id":"https://openalex.org/C148220186","wikidata":"https://www.wikidata.org/wiki/Q7111912","display_name":"Outcome (game theory)","level":2,"score":0.32910001277923584},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3073999881744385},{"id":"https://openalex.org/C40993552","wikidata":"https://www.wikidata.org/wiki/Q514654","display_name":"Gold standard (test)","level":2,"score":0.2980000078678131},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.2962999939918518},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.2955999970436096},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2741999924182892},{"id":"https://openalex.org/C32896092","wikidata":"https://www.wikidata.org/wiki/Q189447","display_name":"Risk management","level":2,"score":0.2694999873638153},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.26669999957084656},{"id":"https://openalex.org/C2776372474","wikidata":"https://www.wikidata.org/wiki/Q508291","display_name":"Simplicity","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2619999945163727},{"id":"https://openalex.org/C12174686","wikidata":"https://www.wikidata.org/wiki/Q1058438","display_name":"Risk assessment","level":2,"score":0.25679999589920044},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.2565000057220459},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.2556000053882599},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2547000050544739},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.25459998846054077}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.27374","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27374","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.27374","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27374","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"As":[0],"LLMs":[1,77],"become":[2,198],"credible":[3],"readers":[4],"of":[5],"earnings":[6],"calls,":[7],"investor-relations":[8],"Q\\&amp;A,":[9],"guidance,":[10],"and":[11,26,139,162,183,208,247],"disclosure":[12],"language,":[13],"supervised":[14,240],"financial":[15,241],"NLP":[16],"benchmarks":[17,242],"increasingly":[18],"function":[19],"as":[20],"decision":[21],"evidence":[22,37],"for":[23,239],"model":[24],"selection":[25],"deployment.":[27],"A":[28],"hidden":[29],"assumption":[30,40],"is":[31,48,118,125,154,169,230],"that":[32],"gold":[33,244],"labels":[34,245],"make":[35],"such":[36],"objective.":[38],"This":[39,116],"breaks":[41],"down":[42],"when":[43],"the":[44,106,110,132,148,163,173,188,213,218,225],"benchmark":[45],"ruler":[46,250],"itself":[47],"sensitive":[49],"to":[50,103],"rubric":[51,92,134],"wording,":[52],"metric":[53,144,215],"choice,":[54],"or":[55],"aggregation":[56],"policy.":[57],"We":[58],"study":[59],"this":[60,203],"measurement":[61],"risk":[62],"on":[63,212,224],"Japanese":[64],"Financial":[65],"Implicit-Commitment":[66],"Recognition":[67],"(JF-ICR;":[68],"a":[69,121,127,232,236],"pinned":[70],"253-item":[71],"test":[72],"split":[73],"x":[74,78,81,84],"4":[75],"frontier":[76],"5":[79,85],"rubrics":[80],"3":[82],"temperatures":[83],"ordinal":[86],"metrics).":[87],"Three":[88],"findings":[89],"follow.":[90],"First,":[91],"wording":[93],"materially":[94],"changes":[95],"model-assigned":[96],"labels:":[97],"R2--R3":[98],"agreement":[99],"ranges":[100],"from":[101],"70.0%":[102],"83.4%,":[104],"with":[105,120],"dominant":[107],"movement":[108],"near":[109,158],"+1":[111],"/":[112],"0":[113],"implicit-commitment":[114],"boundary.":[115],"pattern":[117],"consistent":[119],"pragmatic-boundary":[122],"interpretation,":[123],"but":[124,235],"not":[126,142,231],"validated":[128],"linguistic-causality":[129],"claim":[130],"because":[131,157,172],"present":[133],"variants":[135],"confound":[136],"semantics,":[137],"examples,":[138],"verbosity.":[140],"Second,":[141],"every":[143],"remains":[145],"informative":[146],"under":[147,191],"JF-ICR":[149],"class":[150,165,175],"distribution.":[151],"Within-one":[152],"accuracy":[153,168],"too":[155,170],"easy":[156],"misses":[159],"receive":[160],"credit":[161],"majority":[164],"dominates;":[166],"worst-class":[167],"noisy":[171],"rarest":[174],"has":[176],"only":[177,201],"two":[178],"examples.":[179],"Exact":[180],"accuracy,":[181],"macro-F1,":[182],"weighted":[184],"\\k{appa}":[185],"are":[186],"therefore":[187],"identifiable":[189,214],"metrics":[190],"our":[192],"operational":[193],"rule.":[194],"Third,":[195],"ranking":[196],"claims":[197],"more":[199],"defensible":[200],"after":[202],"metric-identifiability":[204],"audit:":[205],"Bradley--Terry,":[206],"Borda,":[207],"Ranked":[209],"Pairs":[210],"agree":[211],"subset,":[216],"while":[217],"full":[219],"five-metric":[220],"sweep":[221],"produces":[222],"disagreement":[223],"closest":[226],"pair.":[227],"The":[228],"contribution":[229],"new":[233],"leaderboard,":[234],"reporting":[237],"discipline":[238],"whose":[243,248],"exist":[246],"evaluation":[249],"still":[251],"requires":[252],"governance.":[253]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-02T00:00:00"}
