{"id":"https://openalex.org/W4406237011","doi":"https://doi.org/10.1162/tacl_a_00729","title":"<scp>CLAPnq</scp>: <u>C</u>ohesive <u>L</u>ong-form <u>A</u>nswers from <u>P</u>assages in Natural Questions for RAG systems","display_name":"<scp>CLAPnq</scp>: <u>C</u>ohesive <u>L</u>ong-form <u>A</u>nswers from <u>P</u>assages in Natural Questions for RAG systems","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4406237011","doi":"https://doi.org/10.1162/tacl_a_00729"},"language":"en","primary_location":{"id":"doi:10.1162/tacl_a_00729","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00729","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00729/2499744/tacl_a_00729.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00729/2499744/tacl_a_00729.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103058383","display_name":"Sara Rosenthal","orcid":"https://orcid.org/0000-0002-9603-4699"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sara Rosenthal","raw_affiliation_strings":["IBM Research AI, USA. sjrosenthal@us.ibm.com"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research AI, USA. sjrosenthal@us.ibm.com","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112335036","display_name":"Avirup Sil","orcid":"https://orcid.org/0000-0002-4753-3221"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Avirup Sil","raw_affiliation_strings":["IBM Research AI, USA. avi@us.ibm.com"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research AI, USA. avi@us.ibm.com","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113515054","display_name":"Radu Florian","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Radu Florian","raw_affiliation_strings":["IBM Research AI, USA. raduf@us.ibm.com"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research AI, USA. raduf@us.ibm.com","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043396055","display_name":"Salim Roukos","orcid":"https://orcid.org/0000-0003-2140-4349"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Salim Roukos","raw_affiliation_strings":["IBM Research AI, USA. roukos@us.ibm.com"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research AI, USA. roukos@us.ibm.com","institution_ids":["https://openalex.org/I1341412227"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5103058383"],"corresponding_institution_ids":["https://openalex.org/I1341412227"],"apc_list":null,"apc_paid":null,"fwci":22.8686,"has_fulltext":true,"cited_by_count":12,"citation_normalized_percentile":{"value":0.99288375,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"13","issue":null,"first_page":"53","last_page":"72"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.963699996471405,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4487782418727875}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4487782418727875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1162/tacl_a_00729","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00729","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00729/2499744/tacl_a_00729.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1162/tacl_a_00729","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00729","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00729/2499744/tacl_a_00729.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4406237011.pdf"},"referenced_works_count":48,"referenced_works":["https://openalex.org/W135190683","https://openalex.org/W2912924812","https://openalex.org/W2914304175","https://openalex.org/W2950681488","https://openalex.org/W2951434086","https://openalex.org/W2963323070","https://openalex.org/W2963748441","https://openalex.org/W2990138404","https://openalex.org/W2997723601","https://openalex.org/W3007672467","https://openalex.org/W3094041811","https://openalex.org/W3100292568","https://openalex.org/W3169283738","https://openalex.org/W3201174429","https://openalex.org/W4206821533","https://openalex.org/W4213009331","https://openalex.org/W4226059645","https://openalex.org/W4252076394","https://openalex.org/W4296557505","https://openalex.org/W4307079201","https://openalex.org/W4310923309","https://openalex.org/W4311731003","https://openalex.org/W4376643691","https://openalex.org/W4382618460","https://openalex.org/W4384918448","https://openalex.org/W4385571922","https://openalex.org/W4385573898","https://openalex.org/W4386566840","https://openalex.org/W4388717143","https://openalex.org/W4389520670","https://openalex.org/W4389984066","https://openalex.org/W4391591467","https://openalex.org/W4393147129","https://openalex.org/W4401042427","https://openalex.org/W6774222543","https://openalex.org/W6777615688","https://openalex.org/W6778883912","https://openalex.org/W6784828615","https://openalex.org/W6807151135","https://openalex.org/W6847076894","https://openalex.org/W6847200499","https://openalex.org/W6847303613","https://openalex.org/W6854866820","https://openalex.org/W6857079699","https://openalex.org/W6858023062","https://openalex.org/W6859201984","https://openalex.org/W6861737864","https://openalex.org/W7054915730"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Abstract":[0],"Retrieval":[1],"Augmented":[2],"Generation":[3],"(RAG)":[4],"has":[5],"become":[6],"a":[7,31,43,58,82],"popular":[8],"application":[9],"for":[10,41,64,144,155],"large":[11],"language":[12],"models.":[13],"It":[14],"is":[15,39,52,111,151,161],"preferable":[16],"that":[17,24,108,122,146],"successful":[18,135],"RAG":[19,45,67,92,126],"systems":[20],"provide":[21],"accurate":[22],"answers":[23,72,96],"are":[25,97,123],"supported":[26],"by":[27,115],"being":[28,47],"grounded":[29,74,158],"in":[30,157],"passage":[32,121],"without":[33],"any":[34],"hallucinations.":[35],"While":[36],"considerable":[37],"work":[38],"required":[40],"building":[42],"full":[44,66,91,103],"pipeline,":[46],"able":[48],"to":[49,84,130,133],"benchmark":[50,59],"performance":[51],"also":[53],"necessary.":[54],"We":[55,138],"present":[56,139],"CLAPnq,":[57],"Long-form":[60],"Question":[61],"Answering":[62],"dataset":[63],"the":[65,90,102,109,120],"pipeline.":[68,93],"CLAPnq":[69,95,145,160],"includes":[70],"long":[71],"with":[73],"gold":[75],"passages":[76],"from":[77],"Natural":[78],"Questions":[79],"(NQ)":[80],"and":[81,105,142],"corpus":[83],"perform":[85],"either":[86],"retrieval,":[87],"generation,":[88],"or":[89],"The":[94],"concise,":[98],"3x":[99],"smaller":[100],"than":[101],"passage,":[104],"cohesive,":[106],"meaning":[107],"answer":[110],"composed":[112],"fluently,":[113],"often":[114],"integrating":[116],"multiple":[117],"pieces":[118],"of":[119],"not":[124],"contiguous.":[125],"models":[127],"must":[128],"adapt":[129],"these":[131],"properties":[132],"be":[134],"at":[136,164],"CLAPnq.":[137],"baseline":[140],"experiments":[141],"analysis":[143],"highlight":[147],"areas":[148],"where":[149],"there":[150],"still":[152],"significant":[153],"room":[154],"improvement":[156],"RAG.":[159],"publicly":[162],"available":[163],"https://github.com/primeqa/clapnq.":[165]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":8}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
