{"id":"https://openalex.org/W7152588583","doi":"https://doi.org/10.1145/3774904.3792540","title":"Reinforcement Learning with Verbalized Probabilities for LLM Classification","display_name":"Reinforcement Learning with Verbalized Probabilities for LLM Classification","publication_year":2026,"publication_date":"2026-04-09","ids":{"openalex":"https://openalex.org/W7152588583","doi":"https://doi.org/10.1145/3774904.3792540"},"language":null,"primary_location":{"id":"doi:10.1145/3774904.3792540","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3774904.3792540","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Web Conference 2026","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3774904.3792540","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5121251703","display_name":"Liyao Li","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Liyao Li","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0005-5235-1982","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hao Chen","orcid":"https://orcid.org/0000-0002-1560-825X"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Chen","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-1560-825X","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133264065","display_name":"Jiaming Tian","orcid":"https://orcid.org/0009-0002-3224-4803"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaming Tian","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0002-3224-4803","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121294507","display_name":"Wentao Ye","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wentao Ye","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0007-7058-1604","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107568528","display_name":"Lirong Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lirong Gao","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0005-1444-8505","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102863976","display_name":"Chao Ye","orcid":"https://orcid.org/0009-0006-1356-8246"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Ye","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0006-1356-8246","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101571136","display_name":"Ningtao Wang","orcid":"https://orcid.org/0009-0005-6577-5047"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ningtao Wang","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0005-6577-5047","affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019052047","display_name":"Xing Fu","orcid":"https://orcid.org/0000-0002-3536-2779"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xing Fu","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-3536-2779","affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101580520","display_name":"Yu Cheng","orcid":"https://orcid.org/0000-0001-5469-3509"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu Cheng","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0001-5469-3509","affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Haobo Wang","orcid":"https://orcid.org/0000-0001-8586-3048"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haobo Wang","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0001-8586-3048","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Gang Chen","orcid":"https://orcid.org/0000-0002-7483-0045"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Chen","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-7483-0045","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034520734","display_name":"Junbo Zhao","orcid":"https://orcid.org/0000-0002-3637-2936"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junbo Zhao","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-3637-2936","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5121251703"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.78841427,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7401","last_page":"7411"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.14309999346733093,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.14309999346733093,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.10220000147819519,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.07100000232458115,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.35839998722076416},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.31540000438690186},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.298799991607666},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.2793000042438507},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.266400009393692}],"concepts":[{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4763999879360199},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4494999945163727},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3953000009059906},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.35839998722076416},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3330000042915344},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.31540000438690186},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.298799991607666},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.2793000042438507},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.25369998812675476}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3774904.3792540","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3774904.3792540","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Web Conference 2026","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3774904.3792540","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3774904.3792540","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Web Conference 2026","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6750065684318542,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2183341477","https://openalex.org/W3153427360","https://openalex.org/W4214864778","https://openalex.org/W4402670143","https://openalex.org/W4402671665","https://openalex.org/W4404782672","https://openalex.org/W4415797679"],"related_works":[],"abstract_inverted_index":{"While":[0],"Large":[1],"Language":[2],"Models":[3],"(LLMs)":[4],"excel":[5],"at":[6],"many":[7],"reasoning":[8,45],"tasks,":[9,124],"their":[10,20,41],"native":[11],"inability":[12],"to":[13,34,78,141,169,191],"produce":[14],"calibrated,":[15],"multi-class":[16],"probability":[17,88],"distributions":[18],"limits":[19],"use":[21],"in":[22,97],"high-stakes":[23],"Web":[24,157,193],"applications":[25],"like":[26],"content":[27],"moderation":[28],"and":[29,63,84,188,205],"fraud":[30],"detection.":[31],"Existing":[32],"methods":[33],"elicit":[35],"probabilities":[36,107],"from":[37,49,108],"LLMs":[38],"either":[39],"sacrifice":[40],"crucial":[42],"Chain-of-Thought":[43],"(CoT)":[44],"capabilities":[46],"or":[47,170],"suffer":[48],"poor":[50],"calibration.":[51],"To":[52],"address":[53],"this,":[54],"we":[55,125],"introduce":[56],"a":[57,64,85,113,128,134,183,198],"new":[58],"paradigm,":[59],"Verbalized":[60,72],"Probability":[61],"Distribution,":[62],"novel":[65],"training":[66,120],"framework,":[67],"RLVP":[68,74],"(Reinforcement":[69],"Learning":[70,100],"with":[71,163],"Probabilities).":[73],"fine-tunes":[75],"an":[76,81],"LLM":[77],"generate":[79],"both":[80],"interpretable":[82,206],"CoT":[83],"complete,":[86],"verbalized":[87],"distribution.":[89],"We":[90],"overcome":[91],"the":[92,147,177,210],"''insufficient":[93],"reward":[94,115],"granularity''":[95],"problem":[96],"standard":[98],"Reinforcement":[99],"(RL)":[101],"for":[102,186,209],"classification":[103],"by":[104],"using":[105],"soft":[106],"expert":[109,173],"tabular":[110,123],"models":[111,174,208],"as":[112],"dense":[114],"curriculum.":[116],"Through":[117],"large-scale":[118],"joint":[119],"on":[121,139,154,176],"169":[122],"demonstrate":[126],"that":[127,159],"single":[129],"RLVP-trained":[130],"model":[131,149],"can":[132],"surpass":[133],"strong,":[135],"task-specific":[136],"XGBoost":[137],"baseline":[138],"up":[140],"55%":[142],"of":[143],"tasks.":[144],"More":[145],"importantly,":[146],"trained":[148,175],"achieves":[150],"state-of-the-art":[151],"few-shot":[152],"performance":[153,167],"unseen,":[155],"heterogeneous":[156],"benchmarks":[158],"mix":[160],"structured":[161],"data":[162],"free":[164],"text,":[165],"achieving":[166],"comparable":[168],"superior":[171],"than":[172],"same":[178],"limited":[179],"data.":[180,194],"This":[181],"showcases":[182],"strong":[184],"capability":[185],"generalization":[187],"knowledge":[189],"transfer":[190],"complex":[192],"Our":[195],"work":[196],"presents":[197],"viable":[199],"path":[200],"toward":[201],"building":[202],"general-purpose,":[203],"probabilistically-sound,":[204],"foundation":[207],"Web.":[211]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-04-10T00:00:00"}
