{"id":"https://openalex.org/W4388685627","doi":"https://doi.org/10.48550/arxiv.2311.07383","title":"LM-Polygraph: Uncertainty Estimation for Language Models","display_name":"LM-Polygraph: Uncertainty Estimation for Language Models","publication_year":2023,"publication_date":"2023-11-13","ids":{"openalex":"https://openalex.org/W4388685627","doi":"https://doi.org/10.48550/arxiv.2311.07383"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2311.07383","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.07383","pdf_url":"https://arxiv.org/pdf/2311.07383","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2311.07383","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102590169","display_name":"Ekaterina Fadeeva","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Fadeeva, Ekaterina","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046928528","display_name":"Roman Vashurin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vashurin, Roman","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026319614","display_name":"Akim Tsvigun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tsvigun, Akim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054388208","display_name":"Artem Vazhentsev","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vazhentsev, Artem","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059760393","display_name":"Sergey Petrakov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Petrakov, Sergey","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042032750","display_name":"Kirill Fedyanin","orcid":"https://orcid.org/0000-0003-0363-9195"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fedyanin, Kirill","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113067486","display_name":"Daniil Vasilev","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vasilev, Daniil","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030766486","display_name":"Elizaveta Goncharova","orcid":"https://orcid.org/0000-0001-8358-9647"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goncharova, Elizaveta","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026157285","display_name":"Alexander Panchenko","orcid":"https://orcid.org/0000-0001-6097-6118"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Panchenko, Alexander","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058551285","display_name":"Maxim S. Panov","orcid":"https://orcid.org/0000-0002-6817-407X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Panov, Maxim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103085805","display_name":"Timothy Baldwin","orcid":"https://orcid.org/0000-0002-4445-1386"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Baldwin, Timothy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5071397402","display_name":"Artem Shelmanov","orcid":"https://orcid.org/0000-0002-2151-6212"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shelmanov, Artem","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5102590169"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9545000195503235,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/polygraph","display_name":"Polygraph","score":0.7387505769729614},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5736854076385498},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4175504744052887},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.4113801121711731},{"id":"https://openalex.org/keywords/safer","display_name":"SAFER","score":0.4112755060195923},{"id":"https://openalex.org/keywords/estimation","display_name":"Estimation","score":0.41041818261146545},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.31945139169692993},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.2245919406414032},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.21179312467575073},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.20998480916023254},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.18827879428863525},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.10183095932006836}],"concepts":[{"id":"https://openalex.org/C37428701","wikidata":"https://www.wikidata.org/wiki/Q1686381","display_name":"Polygraph","level":2,"score":0.7387505769729614},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5736854076385498},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4175504744052887},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.4113801121711731},{"id":"https://openalex.org/C2776654903","wikidata":"https://www.wikidata.org/wiki/Q2601463","display_name":"SAFER","level":2,"score":0.4112755060195923},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.41041818261146545},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.31945139169692993},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.2245919406414032},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.21179312467575073},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.20998480916023254},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.18827879428863525},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.10183095932006836},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2311.07383","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.07383","pdf_url":"https://arxiv.org/pdf/2311.07383","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2311.07383","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2311.07383","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2311.07383","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.07383","pdf_url":"https://arxiv.org/pdf/2311.07383","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5799999833106995,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2416302503","https://openalex.org/W2376108705","https://openalex.org/W2359820751","https://openalex.org/W2905865776","https://openalex.org/W2355319129","https://openalex.org/W2603666774","https://openalex.org/W2569483408","https://openalex.org/W2522864199","https://openalex.org/W646170675","https://openalex.org/W4387912515"],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,20,108,116],"the":[3,12,44,139,156],"capabilities":[4],"of":[5,17,46,64,99,102,127,173],"large":[6],"language":[7],"models":[8,30],"(LLMs)":[9],"have":[10],"paved":[11],"way":[13],"for":[14,73,106,124],"a":[15,24,95,100,133],"myriad":[16],"groundbreaking":[18],"applications":[19],"various":[21],"fields.":[22],"However,":[23,66],"significant":[25],"challenge":[26],"arises":[27],"as":[28],"these":[29],"often":[31],"\"hallucinate\",":[32],"i.e.,":[33],"fabricate":[34],"facts":[35],"without":[36],"providing":[37],"users":[38],"an":[39,121],"apparent":[40],"means":[41],"to":[42,56,67,148,169],"discern":[43,149],"veracity":[45],"their":[47],"statements.":[48],"Uncertainty":[49],"estimation":[50],"(UE)":[51],"methods":[52,72,105],"are":[53],"one":[54],"path":[55],"safer,":[57],"more":[58,61],"responsible,":[59],"and":[60,132,164,166],"effective":[62],"use":[63],"LLMs.":[65],"date,":[68],"research":[69],"on":[70,79],"UE":[71,104,128],"LLMs":[74,107],"has":[75],"been":[76],"focused":[77],"primarily":[78],"theoretical":[80],"rather":[81],"than":[82],"engineering":[83],"contributions.":[84],"In":[85],"this":[86,90],"work,":[87],"we":[88],"tackle":[89],"issue":[91],"by":[92,130],"introducing":[93],"LM-Polygraph,":[94],"framework":[96],"with":[97,112,143,155],"implementations":[98],"battery":[101],"state-of-the-art":[103],"text":[109],"generation":[110],"tasks,":[111],"unified":[113],"program":[114],"interfaces":[115],"Python.":[117],"Additionally,":[118],"it":[119],"introduces":[120],"extendable":[122],"benchmark":[123],"consistent":[125],"evaluation":[126],"techniques":[129],"researchers,":[131],"demo":[134],"web":[135],"application":[136],"that":[137],"enriches":[138],"standard":[140],"chat":[141],"dialog":[142],"confidence":[144],"scores,":[145],"empowering":[146],"end-users":[147],"unreliable":[150],"responses.":[151],"LM-Polygraph":[152],"is":[153,167],"compatible":[154],"most":[157],"recent":[158],"LLMs,":[159],"including":[160],"BLOOMz,":[161],"LLaMA-2,":[162],"ChatGPT,":[163],"GPT-4,":[165],"designed":[168],"support":[170],"future":[171],"releases":[172],"similarly-styled":[174],"LMs.":[175]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
