{"id":"https://openalex.org/W4416036893","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.166","title":"Are Generative Models Underconfident? Better Quality Estimation with Boosted Model Probability","display_name":"Are Generative Models Underconfident? Better Quality Estimation with Boosted Model Probability","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416036893","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.166"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.166","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.166","pdf_url":"https://aclanthology.org/2025.emnlp-main.166.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.166.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018054307","display_name":"Tu Anh Dinh","orcid":"https://orcid.org/0000-0001-7651-820X"},"institutions":[{"id":"https://openalex.org/I102335020","display_name":"Karlsruhe Institute of Technology","ror":"https://ror.org/04t3en479","country_code":"DE","type":"education","lineage":["https://openalex.org/I102335020","https://openalex.org/I1305996414"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Tu Anh Dinh","raw_affiliation_strings":["Karlsruhe Institute of Technology Karlsruhe , Germany"],"affiliations":[{"raw_affiliation_string":"Karlsruhe Institute of Technology Karlsruhe , Germany","institution_ids":["https://openalex.org/I102335020"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046084081","display_name":"Jan Niehues","orcid":"https://orcid.org/0000-0002-4231-6543"},"institutions":[{"id":"https://openalex.org/I102335020","display_name":"Karlsruhe Institute of Technology","ror":"https://ror.org/04t3en479","country_code":"DE","type":"education","lineage":["https://openalex.org/I102335020","https://openalex.org/I1305996414"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jan Niehues","raw_affiliation_strings":["Karlsruhe Institute of Technology Karlsruhe , Germany"],"affiliations":[{"raw_affiliation_string":"Karlsruhe Institute of Technology Karlsruhe , Germany","institution_ids":["https://openalex.org/I102335020"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5018054307"],"corresponding_institution_ids":["https://openalex.org/I102335020"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.18542138,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3364","last_page":"3382"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.1808999925851822,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.1808999925851822,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.12129999697208405,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.045499999076128006,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4952000081539154},{"id":"https://openalex.org/keywords/estimation","display_name":"Estimation","score":0.42250001430511475},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.41769999265670776},{"id":"https://openalex.org/keywords/statistical-model","display_name":"Statistical model","score":0.38429999351501465},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.32339999079704285},{"id":"https://openalex.org/keywords/empirical-probability","display_name":"Empirical probability","score":0.3100999891757965},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3086000084877014}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5572999715805054},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5515999794006348},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4952000081539154},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.42250001430511475},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.41769999265670776},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.388700008392334},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.38429999351501465},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.32339999079704285},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.31859999895095825},{"id":"https://openalex.org/C97933134","wikidata":"https://www.wikidata.org/wiki/Q5374249","display_name":"Empirical probability","level":4,"score":0.3100999891757965},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3086000084877014},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.30469998717308044},{"id":"https://openalex.org/C167928553","wikidata":"https://www.wikidata.org/wiki/Q1376021","display_name":"Estimation theory","level":2,"score":0.289900004863739},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2856000065803528},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.28380000591278076},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.28110000491142273},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C3020001037","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assessment","level":3,"score":0.2547999918460846}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.166","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.166","pdf_url":"https://aclanthology.org/2025.emnlp-main.166.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.166","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.166","pdf_url":"https://aclanthology.org/2025.emnlp-main.166.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416036893.pdf","grobid_xml":"https://content.openalex.org/works/W4416036893.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Quality":[0],"Estimation":[1],"(QE)":[2],"is":[3,17,27,102],"estimating":[4],"the":[5,8,14,23,28,37,56,85],"quality":[6,21],"of":[7,40],"model":[9,107],"output":[10,20,25,38,47,69,95],"during":[11],"inference":[12],"when":[13],"ground":[15],"truth":[16],"not":[18,65],"available.Deriving":[19],"from":[22],"models'":[24],"probability":[26,39,57,63,108],"most":[29],"trivial":[30],"and":[31],"low-effort":[32],"way.However,":[33],"we":[34,74],"show":[35],"that":[36],"text-generation":[41],"models":[42],"can":[43,50],"appear":[44],"underconfident.At":[45],"each":[46],"step,":[48],"there":[49,91],"be":[51],"multiple":[52,93],"correct":[53],"options,":[54],"making":[55],"distribution":[58],"spread":[59],"out":[60],"more.Thus,":[61],"lower":[62,68],"does":[64],"necessarily":[66],"mean":[67],"quality.Due":[70],"to":[71,120,126],"this":[72],"observation,":[73],"propose":[75],"a":[76],"QE":[77,136],"approach":[78],"called":[79],"BOOSTEDPROB":[80,101],"1":[81],",":[82],"which":[83],"boosts":[84],"model's":[86],"confidence":[87],"in":[88,99,109,117,137],"cases":[89],"where":[90],"are":[92],"viable":[94],"options.With":[96],"no":[97],"increase":[98],"complexity,":[100],"notably":[103],"better":[104],"than":[105],"raw":[106],"different":[110],"settings,":[111],"achieving":[112],"on":[113],"average":[114],"+0.194":[115],"improvement":[116],"Pearson":[118],"correlation":[119],"groundtruth":[121],"quality.It":[122],"also":[123],"comes":[124],"close":[125],"or":[127,134],"outperforms":[128],"more":[129],"costly":[130],"approaches":[131],"like":[132],"supervised":[133],"ensemble-based":[135],"certain":[138],"settings.":[139]},"counts_by_year":[],"updated_date":"2026-03-10T14:07:55.174380","created_date":"2025-11-08T00:00:00"}
