{"id":"https://openalex.org/W4412377060","doi":"https://doi.org/10.1145/3726302.3730047","title":"OBELLA: Open the Book for Evaluating Long-Form Large Language Model Answers in Open-Domain Question Answering","display_name":"OBELLA: Open the Book for Evaluating Long-Form Large Language Model Answers in Open-Domain Question Answering","publication_year":2025,"publication_date":"2025-07-13","ids":{"openalex":"https://openalex.org/W4412377060","doi":"https://doi.org/10.1145/3726302.3730047"},"language":"en","primary_location":{"id":"doi:10.1145/3726302.3730047","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3730047","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730047","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730047","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070212925","display_name":"Tianyu Ren","orcid":"https://orcid.org/0009-0008-1691-5690"},"institutions":[{"id":"https://openalex.org/I126231945","display_name":"Queen's University Belfast","ror":"https://ror.org/00hswnk62","country_code":"GB","type":"education","lineage":["https://openalex.org/I126231945"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Tianyu Ren","raw_affiliation_strings":["Queen's University Belfast, Belfast, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Queen's University Belfast, Belfast, United Kingdom","institution_ids":["https://openalex.org/I126231945"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101694076","display_name":"Zhaoyu Zhang","orcid":"https://orcid.org/0000-0003-4303-2806"},"institutions":[{"id":"https://openalex.org/I126231945","display_name":"Queen's University Belfast","ror":"https://ror.org/00hswnk62","country_code":"GB","type":"education","lineage":["https://openalex.org/I126231945"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zhaoyu Zhang","raw_affiliation_strings":["Queen's University Belfast, Belfast, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Queen's University Belfast, Belfast, United Kingdom","institution_ids":["https://openalex.org/I126231945"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100460917","display_name":"Hui Wang","orcid":"https://orcid.org/0000-0003-2633-6015"},"institutions":[{"id":"https://openalex.org/I126231945","display_name":"Queen's University Belfast","ror":"https://ror.org/00hswnk62","country_code":"GB","type":"education","lineage":["https://openalex.org/I126231945"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Hui Wang","raw_affiliation_strings":["Queen's University Belfast, Belfast, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Queen's University Belfast, Belfast, United Kingdom","institution_ids":["https://openalex.org/I126231945"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023499716","display_name":"Karen Rafferty","orcid":"https://orcid.org/0000-0002-7443-7876"},"institutions":[{"id":"https://openalex.org/I126231945","display_name":"Queen's University Belfast","ror":"https://ror.org/00hswnk62","country_code":"GB","type":"education","lineage":["https://openalex.org/I126231945"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Karen Rafferty","raw_affiliation_strings":["Queen's University Belfast, Belfast, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Queen's University Belfast, Belfast, United Kingdom","institution_ids":["https://openalex.org/I126231945"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5070212925"],"corresponding_institution_ids":["https://openalex.org/I126231945"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08561575,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1109","last_page":"1119"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.8597126007080078},{"id":"https://openalex.org/keywords/open-domain","display_name":"Open domain","score":0.8480544090270996},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7009259462356567},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5264958739280701},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4857359528541565},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4848981499671936},{"id":"https://openalex.org/keywords/closed-ended-question","display_name":"Closed-ended question","score":0.466599702835083},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.41989603638648987},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41182056069374084},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.2837089002132416},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09815222024917603},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.09224247932434082}],"concepts":[{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.8597126007080078},{"id":"https://openalex.org/C2993776861","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Open domain","level":3,"score":0.8480544090270996},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7009259462356567},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5264958739280701},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4857359528541565},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4848981499671936},{"id":"https://openalex.org/C193035329","wikidata":"https://www.wikidata.org/wiki/Q17007046","display_name":"Closed-ended question","level":2,"score":0.466599702835083},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.41989603638648987},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41182056069374084},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2837089002132416},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09815222024917603},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.09224247932434082},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3726302.3730047","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3730047","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730047","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.qub.ac.uk/portal:openaire/6c345035-d082-4c2c-ab5c-4c67a96c6734","is_oa":true,"landing_page_url":"https://pure.qub.ac.uk/en/publications/6c345035-d082-4c2c-ab5c-4c67a96c6734","pdf_url":null,"source":{"id":"https://openalex.org/S4306402319","display_name":"Research Portal (Queen's University Belfast)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I126231945","host_organization_name":"Queen's University Belfast","host_organization_lineage":["https://openalex.org/I126231945"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Ren, T, Zhang, Z, Wang, H & Rafferty, K 2025, OBELLA: open the book for evaluating long-form large language model answers in open-domain question answering. in SIGIR '25: Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval. SIGIR 2025 - Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval, Association for Computing Machinery, pp. 1109-1119, 48th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR 2025, Padua, Italy, 13/07/2025. https://doi.org/10.1145/3726302.3730047","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"doi:10.1145/3726302.3730047","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3730047","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730047","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412377060.pdf","grobid_xml":"https://content.openalex.org/works/W4412377060.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W2053154970","https://openalex.org/W2101105183","https://openalex.org/W2112644606","https://openalex.org/W2125436846","https://openalex.org/W2194775991","https://openalex.org/W2612690371","https://openalex.org/W2891012317","https://openalex.org/W2912924812","https://openalex.org/W2963339397","https://openalex.org/W2963748441","https://openalex.org/W2964068236","https://openalex.org/W2990138404","https://openalex.org/W3034850762","https://openalex.org/W3091824185","https://openalex.org/W3100292568","https://openalex.org/W3156789018","https://openalex.org/W3169138743","https://openalex.org/W4205924343","https://openalex.org/W4252076394","https://openalex.org/W4385569780","https://openalex.org/W4385570556","https://openalex.org/W4385571412","https://openalex.org/W4385572714","https://openalex.org/W4389523719","https://openalex.org/W4392669702","https://openalex.org/W4393300286","https://openalex.org/W4396823873","https://openalex.org/W4400524654","https://openalex.org/W4400529101","https://openalex.org/W4402670684","https://openalex.org/W4402671562","https://openalex.org/W4404783040","https://openalex.org/W4409347896","https://openalex.org/W6600075554","https://openalex.org/W6604820196"],"related_works":["https://openalex.org/W2951097643","https://openalex.org/W4309395021","https://openalex.org/W3091989500","https://openalex.org/W4406245437","https://openalex.org/W3215363805","https://openalex.org/W204133468","https://openalex.org/W2991310128","https://openalex.org/W4307481286","https://openalex.org/W2391533720","https://openalex.org/W3216306357"],"abstract_inverted_index":{"Reliable":[0],"factuality":[1,104,222],"evaluation":[2,105,192,212],"is":[3],"critical":[4],"for":[5,27,111,139,160],"the":[6,17,90,117,124],"iterative":[7],"development":[8],"of":[9,19,92],"open-domain":[10],"question":[11],"answering":[12],"(ODQA)":[13],"systems,":[14],"especially":[15],"given":[16,85],"rise":[18],"large":[20],"language":[21],"models":[22],"(LLMs)":[23],"and":[24,63,71,186],"their":[25],"propensity":[26],"hallucination.":[28],"However,":[29],"state-of-the-art":[30],"(SOTA)":[31],"automatic":[32],"metrics,":[33],"which":[34],"are":[35],"mostly":[36],"supervised,":[37],"remain":[38],"notably":[39],"less":[40],"reliable":[41],"than":[42],"humans.":[43],"In":[44],"this":[45,53],"paper,":[46],"we":[47,99,166],"find":[48],"two":[49,210],"key":[50],"challenges":[51],"behind":[52],"gap:":[54],"(1)":[55],"length":[56,118,127],"distribution":[57,119],"mismatch":[58,120],"between":[59],"lengthy":[60],"LLM":[61,93,113,131],"answers":[62,66,82,144],"shorter":[64],"training":[65],"used":[67],"by":[68,89,121,156],"current":[69,76],"metrics;":[70],"(2)":[72],"reference":[73,147,196],"incompleteness,":[74],"where":[75],"metrics":[77],"often":[78],"misjudge":[79],"valid":[80],"system":[81],"absent":[83],"from":[84,149],"references-a":[86],"challenge":[87],"worsened":[88],"diversity":[91],"outputs.":[94,132],"To":[95],"address":[96],"these":[97],"issues,":[98],"present":[100],"a":[101,136,168,177,188,201,215],"new":[102,202],"ODQA":[103,211,221],"dataset":[106],"called":[107],"OBELLA":[108,115],"(Open-Book":[109],"Evaluation":[110],"Long-form":[112],"Answers).":[114],"narrows":[116],"significantly":[122],"increasing":[123],"candidate":[125,143,183],"answer":[126,184],"to":[128,145,180,194],"align":[129],"with":[130,206],"Moreover,":[133],"it":[134],"introduces":[135],"neutral":[137],"class":[138],"plausible":[140],"yet":[141],"under-supported":[142],"differentiate":[146],"incompleteness":[148],"outright":[150],"incorrectness,":[151],"thus":[152],"enabling":[153],"flexible":[154],"reevaluation":[155],"consulting":[157],"external":[158],"knowledge":[159],"more":[161,219],"references.":[162],"Based":[163],"on":[164],"OBELLA,":[165],"propose":[167],"novel":[169],"metric":[170],"named":[171],"OBELLAM":[172,175,199],"(OBELLA":[173],"Metric).":[174],"integrates":[176],"cross-attention":[178],"mechanism":[179],"enhance":[181],"long-form":[182],"representations":[185],"employs":[187],"dynamic":[189],"closed-open":[190],"book":[191],"strategy":[193],"tackle":[195],"incompleteness.":[197],"Our":[198],"sets":[200],"SOTA":[203],"in":[204],"aligning":[205],"human":[207],"judgments":[208],"across":[209],"benchmarks,":[213],"marking":[214],"promising":[216],"step":[217],"toward":[218],"robust":[220],"evaluation.":[223]},"counts_by_year":[],"updated_date":"2026-03-15T09:29:46.208133","created_date":"2025-10-10T00:00:00"}
