{"id":"https://openalex.org/W4411113290","doi":"https://doi.org/10.18653/v1/2025.findings-naacl.46","title":"Large-Scale Corpus Construction and Retrieval-Augmented Generation for Ancient Chinese Poetry: New Method and Data Insights","display_name":"Large-Scale Corpus Construction and Retrieval-Augmented Generation for Ancient Chinese Poetry: New Method and Data Insights","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4411113290","doi":"https://doi.org/10.18653/v1/2025.findings-naacl.46"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.findings-naacl.46","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-naacl.46","pdf_url":"https://aclanthology.org/2025.findings-naacl.46.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: NAACL 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-naacl.46.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100355722","display_name":"Yang Liu","orcid":"https://orcid.org/0000-0001-8420-6011"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yang Liu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067856294","display_name":"Lan Lan","orcid":"https://orcid.org/0009-0005-1531-8753"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lan Lan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102702850","display_name":"Jiahuan Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiahuan Cao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090623022","display_name":"Hiuyi Cheng","orcid":"https://orcid.org/0000-0003-2785-9184"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hiuyi Cheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101873754","display_name":"Kai Ding","orcid":"https://orcid.org/0000-0002-4214-1923"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kai Ding","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5080674767","display_name":"Lianwen Jin","orcid":"https://orcid.org/0000-0002-5456-0957"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lianwen Jin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100355722"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17377605,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"779","last_page":"817"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.8877999782562256,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.8877999782562256,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6706954836845398},{"id":"https://openalex.org/keywords/poetry","display_name":"Poetry","score":0.6126376390457153},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5743194818496704},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5151346921920776},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.49892115592956543},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46610909700393677},{"id":"https://openalex.org/keywords/literature","display_name":"Literature","score":0.2494056522846222},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.15782302618026733},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.07914161682128906},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.07176077365875244}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6706954836845398},{"id":"https://openalex.org/C164913051","wikidata":"https://www.wikidata.org/wiki/Q482","display_name":"Poetry","level":2,"score":0.6126376390457153},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5743194818496704},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5151346921920776},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.49892115592956543},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46610909700393677},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.2494056522846222},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.15782302618026733},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.07914161682128906},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.07176077365875244}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-naacl.46","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-naacl.46","pdf_url":"https://aclanthology.org/2025.findings-naacl.46.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: NAACL 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-naacl.46","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-naacl.46","pdf_url":"https://aclanthology.org/2025.findings-naacl.46.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: NAACL 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.6499999761581421,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1428435317","display_name":null,"funder_award_id":"Grant No.:","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2504063345","display_name":null,"funder_award_id":"6244160","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3085993365","display_name":null,"funder_award_id":"(Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5270964542","display_name":null,"funder_award_id":"62441604","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6561198578","display_name":null,"funder_award_id":"62476093","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7726157001","display_name":null,"funder_award_id":"Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4411113290.pdf","grobid_xml":"https://content.openalex.org/works/W4411113290.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2948807893","https://openalex.org/W2899084033","https://openalex.org/W2778153218","https://openalex.org/W2758277628","https://openalex.org/W2748952813","https://openalex.org/W1531601525","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Ancient":[0],"Chinese":[1,8,179],"Poetry":[2],"(ACP),":[3],"a":[4,108,153,163],"critical":[5],"aspect":[6],"of":[7,19,39,71,119,124,135,157,174],"cultural":[9],"heritage,":[10],"presents":[11],"unique":[12],"challenges":[13,23],"for":[14,93],"Large":[15],"Language":[16],"Models":[17],"(LLMs).One":[18],"the":[20,25,52,67,76,90,105,117,122,172,185],"most":[21],"pressing":[22],"is":[24],"significant":[26],"hallucination":[27],"issues":[28],"faced":[29],"by":[30,162],"LLMs":[31,41,120,161,175],"due":[32],"to":[33,65,129,184],"data":[34],"scarcity":[35],"and":[36,60,69,89,146,187],"limited":[37],"ability":[38],"general":[40],"when":[42],"dealing":[43],"with":[44],"ACP.To":[45],"address":[46],"these":[47],"challenges,":[48],"this":[49,101,167,191],"paper":[50],"constructs":[51],"ACP-Corpus,":[53],"which":[54],"encompasses":[55],"1.1":[56],"million":[57,82],"ancient":[58,125,178],"poems":[59],"990K":[61],"related":[62],"texts,":[63],"designed":[64],"enhance":[66],"training":[68],"performance":[70,118],"LLMs.Alongside":[72],"this,":[73],"we":[74,103],"develop":[75],"ACP-QA":[77],"dataset,":[78],"comprising":[79],"over":[80],"12":[81],"question-answer":[83],"pairs":[84],"across":[85],"24":[86],"task":[87],"categories,":[88],"ACP-Eval":[91],"dataset":[92],"rigorous":[94],"evaluation":[95],"purposes,":[96],"containing":[97],"7,050":[98],"entries.Building":[99],"on":[100],"resources,":[102],"propose":[104],"ACP-RAG":[106,131,151],"framework,":[107],"specialized":[109],"Retrieval-Augmented":[110],"Generation":[111],"(RAG)":[112],"approach":[113],"that":[114,150],"significantly":[115],"improves":[116],"in":[121,176],"domain":[123],"poetry":[126,180],"from":[127],"49.2%":[128],"89.0%.The":[130],"contains":[132],"five":[133],"modules":[134],"semantic":[136,139],"coarse-grained":[137],"retrieval,":[138,141,143],"fine-grained":[140],"keyword":[142,144],"matching,":[145],"context":[147],"filtering.Experiments":[148],"show":[149],"achieves":[152],"promising":[154],"response":[155],"accuracy":[156],"89.0%,":[158],"surpassing":[159],"existing":[160],"remarkable":[164],"margin.We":[165],"believe":[166],"work":[168],"not":[169],"only":[170],"advances":[171],"capabilities":[173],"processing":[177],"but":[181],"also":[182],"contributes":[183],"preservation":[186],"innovative":[188],"development":[189],"within":[190],"rich":[192],"literary":[193],"tradition.":[194]},"counts_by_year":[],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
