{"id":"https://openalex.org/W4400524654","doi":"https://doi.org/10.1145/3626772.3657778","title":"CorpusLM: Towards a Unified Language Model on Corpus for Knowledge-Intensive Tasks","display_name":"CorpusLM: Towards a Unified Language Model on Corpus for Knowledge-Intensive Tasks","publication_year":2024,"publication_date":"2024-07-10","ids":{"openalex":"https://openalex.org/W4400524654","doi":"https://doi.org/10.1145/3626772.3657778"},"language":"en","primary_location":{"id":"doi:10.1145/3626772.3657778","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3626772.3657778","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3626772.3657778?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3626772.3657778?download=true","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100667935","display_name":"Xiaoxi Li","orcid":"https://orcid.org/0009-0003-0708-418X"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoxi Li","raw_affiliation_strings":["Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-0708-418X","affiliations":[{"raw_affiliation_string":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010558184","display_name":"Zhicheng Dou","orcid":"https://orcid.org/0000-0002-9781-948X"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhicheng Dou","raw_affiliation_strings":["Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-9781-948X","affiliations":[{"raw_affiliation_string":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032133163","display_name":"Yujia Zhou","orcid":"https://orcid.org/0000-0002-3530-3787"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yujia Zhou","raw_affiliation_strings":["Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-3530-3787","affiliations":[{"raw_affiliation_string":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103326017","display_name":"Fangchao Liu","orcid":"https://orcid.org/0009-0000-6646-1369"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fangchao Liu","raw_affiliation_strings":["Huawei Poisson Lab, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0000-6646-1369","affiliations":[{"raw_affiliation_string":"Huawei Poisson Lab, Beijing, China","institution_ids":["https://openalex.org/I2250955327"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.2577,"has_fulltext":true,"cited_by_count":14,"citation_normalized_percentile":{"value":0.94786716,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"26","last_page":"37"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9782000184059143,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.813626766204834},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5624891519546509},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4461214244365692},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4426132142543793},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.32186704874038696}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.813626766204834},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5624891519546509},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4461214244365692},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4426132142543793},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.32186704874038696}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3626772.3657778","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3626772.3657778","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3626772.3657778?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3626772.3657778","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3626772.3657778","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3626772.3657778?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.6200000047683716,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1899978318","display_name":null,"funder_award_id":"L233008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G228125052","display_name":null,"funder_award_id":"62272467","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G840943008","display_name":null,"funder_award_id":"No. 62272467","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322499","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92"},{"id":"https://openalex.org/F4320322724","display_name":"Ministry of Education, India","ror":"https://ror.org/048xjjh50"},{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4400524654.pdf","grobid_xml":"https://content.openalex.org/works/W4400524654.grobid-xml"},"referenced_works_count":42,"referenced_works":["https://openalex.org/W2594284271","https://openalex.org/W2912924812","https://openalex.org/W2962881743","https://openalex.org/W2963339397","https://openalex.org/W2963961878","https://openalex.org/W2979826702","https://openalex.org/W2981852735","https://openalex.org/W3021397474","https://openalex.org/W3081168214","https://openalex.org/W3099700870","https://openalex.org/W3129831491","https://openalex.org/W3175475697","https://openalex.org/W3185146124","https://openalex.org/W3214546336","https://openalex.org/W4224308101","https://openalex.org/W4252076394","https://openalex.org/W4283315779","https://openalex.org/W4287122359","https://openalex.org/W4288089799","https://openalex.org/W4292215729","https://openalex.org/W4301243929","https://openalex.org/W4307079201","https://openalex.org/W4309674289","https://openalex.org/W4310923309","https://openalex.org/W4362515116","https://openalex.org/W4364383092","https://openalex.org/W4367628274","https://openalex.org/W4378509449","https://openalex.org/W4382491289","https://openalex.org/W4385567756","https://openalex.org/W4385568240","https://openalex.org/W4385571319","https://openalex.org/W4385571915","https://openalex.org/W4387596482","https://openalex.org/W4387848863","https://openalex.org/W4389520393","https://openalex.org/W4389524176","https://openalex.org/W6600062020","https://openalex.org/W6600168044","https://openalex.org/W6600339963","https://openalex.org/W6615264008","https://openalex.org/W6810081322"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Large":[0],"language":[1,58,93],"models":[2,59,221],"(LLMs)":[3],"have":[4],"gained":[5],"significant":[6],"attention":[7],"in":[8,16,71,222],"various":[9,101],"fields":[10],"but":[11],"prone":[12],"to":[13,32,99,123,157,183,190],"hallucination,":[14],"especially":[15],"knowledge-intensive":[17,102],"(KI)":[18],"tasks.":[19,50,73,192,227],"To":[20],"address":[21],"this,":[22],"retrieval-augmented":[23],"generation":[24,144,167],"(RAG)":[25],"has":[26],"emerged":[27],"as":[28],"a":[29,91,113,140,153,164],"popular":[30],"solution":[31],"enhance":[33],"factual":[34],"accuracy.":[35],"However,":[36,74],"traditional":[37],"retrieval":[38,56,72,126,159,224],"modules":[39],"often":[40],"rely":[41],"on":[42,197],"large":[43],"document":[44,65],"index":[45],"and":[46,80,110,127,129,172,187,211,225],"disconnect":[47],"with":[48,203],"generative":[49,55,106],"With":[51],"the":[52,75,120,131,198,216],"advent":[53],"of":[54,134,206,219],"(GR),":[57],"can":[60],"retrieve":[61],"by":[62,104,149],"directly":[63,150],"generating":[64],"identifiers":[66],"(DocIDs),":[67],"offering":[68],"superior":[69,217],"performance":[70,218],"potential":[76],"relationship":[77],"between":[78],"GR":[79,148],"downstream":[81,191,226],"tasks":[82,103],"remains":[83],"unexplored.":[84],"In":[85],"this":[86],"paper,":[87],"we":[88],"propose":[89],"CorpusLM,":[90],"unified":[92,114],"model":[94],"that":[95],"leverages":[96],"external":[97],"corpus":[98],"tackle":[100],"integrating":[105],"retrieval,":[107],"closed-book":[108],"generation,":[109,128],"RAG":[111],"through":[112],"greedy":[115],"decoding":[116],"process.":[117],"We":[118,138,162,176,193],"design":[119,163],"following":[121],"mechanisms":[122],"facilitate":[124],"effective":[125,171],"improve":[130,158],"end-to-end":[132],"effectiveness":[133],"KI":[135],"tasks:":[136],"(1)":[137],"develop":[139],"ranking-oriented":[141],"DocID":[142,154,180,185],"list":[143],"strategy,":[145,168],"which":[146,169],"refines":[147],"learning":[151],"from":[152],"ranking":[155],"list,":[156],"quality.":[160],"(2)":[161],"continuous":[165],"DocIDs-References-Answer":[166],"facilitates":[170],"efficient":[173],"RAG.":[174],"(3)":[175],"employ":[177],"well-designed":[178],"unsupervised":[179],"understanding":[181],"tasks,":[182],"comprehend":[184],"semantics":[186],"their":[188],"relevance":[189],"evaluate":[194],"our":[195,220],"approach":[196],"widely":[199],"used":[200],"KILT":[201],"benchmark":[202],"two":[204],"variants":[205],"backbone":[207],"models,":[208],"i.e.,":[209],"T5":[210],"Llama2.":[212],"Experimental":[213],"results":[214],"demonstrate":[215],"both":[223]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":10}],"updated_date":"2026-06-13T07:54:00.901334","created_date":"2025-10-10T00:00:00"}
