{"id":"https://openalex.org/W7140098372","doi":"https://doi.org/10.18653/v1/2026.findings-eacl.294","title":"Tackling Distractor Documents in Multi-Hop QA with Reinforcement and Curriculum Learning","display_name":"Tackling Distractor Documents in Multi-Hop QA with Reinforcement and Curriculum Learning","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7140098372","doi":"https://doi.org/10.18653/v1/2026.findings-eacl.294"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2026.findings-eacl.294","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.findings-eacl.294","pdf_url":"https://aclanthology.org/2026.findings-eacl.294.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EACL 2026","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2026.findings-eacl.294.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130325046","display_name":"Jerry Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jerry Huang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130343230","display_name":"Siddarth Madala","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Siddarth Madala","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034389677","display_name":"Risham Sidhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Risham Sidhu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130394656","display_name":"Cheng Niu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng Niu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130324855","display_name":"Hao Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao Peng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130380050","display_name":"Julia Hockenmaier","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Julia Hockenmaier","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130338599","display_name":"Tong Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tong Zhang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.39501098,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5548","last_page":"5561"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.19760000705718994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.19760000705718994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10465","display_name":"Neurobiology of Language and Bilingualism","score":0.05400000140070915,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.04619999974966049,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/curriculum","display_name":"Curriculum","score":0.4537000060081482},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.45179998874664307},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.35510000586509705},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.26910001039505005}],"concepts":[{"id":"https://openalex.org/C47177190","wikidata":"https://www.wikidata.org/wiki/Q207137","display_name":"Curriculum","level":2,"score":0.4537000060081482},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.45179998874664307},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.42100000381469727},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.3944000005722046},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.38580000400543213},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.35510000586509705},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2896000146865845},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.26649999618530273},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.25049999356269836}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.18653/v1/2026.findings-eacl.294","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.findings-eacl.294","pdf_url":"https://aclanthology.org/2026.findings-eacl.294.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EACL 2026","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2503.12759","is_oa":true,"landing_page_url":"https://arxiv.org/abs/2503.12759","pdf_url":"https://arxiv.org/pdf/2503.12759","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.18653/v1/2026.findings-eacl.294","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.findings-eacl.294","pdf_url":"https://aclanthology.org/2026.findings-eacl.294.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EACL 2026","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7140098372.pdf","grobid_xml":"https://content.openalex.org/works/W7140098372.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Retrieval-augmented":[0],"generation":[1,14,33],"(RAG)":[2],"systems":[3],"rely":[4],"on":[5,130],"retrieval":[6,85],"models":[7,15,81,102,156],"for":[8,16,36,159],"identifying":[9,66],"relevant":[10,52,67],"contexts":[11],"and":[12,24,50,106,113,135,141,166],"answer":[13,32,74,140],"utilizing":[17],"those":[18],"contexts.However,":[19],"retrievers":[20],"exhibit":[21],"imperfect":[22],"recall":[23],"precision,":[25],"limiting":[26],"downstream":[27],"performance.We":[28],"introduce":[29],"RAG-RL,":[30],"an":[31],"model":[34,117,184],"trained":[35,83],"multi-hop":[37],"question":[38],"answering":[39],"(MHQA)":[40],"to":[41,48,72,103],"not":[42],"only":[43],"generate":[44],"answers":[45],"but":[46],"also":[47],"identify":[49],"cite":[51],"information":[53],"from":[54,69],"larger":[55],"sets":[56],"of":[57,62,65,90,123,170],"retrieved":[58],"contexts,":[59],"shifting":[60],"some":[61],"the":[63,70,73,121],"burden":[64],"documents":[68,100],"retriever":[71],"generator.Our":[75],"approach":[76],"uses":[77],"curriculum":[78],"learning,":[79],"where":[80],"are":[82],"across":[84],"settings":[86],"with":[87,97,109],"varying":[88],"levels":[89],"noise.Our":[91],"experiments":[92,145],"show":[93],"that":[94],"training":[95,152,173,178],"samples":[96,153],"fewer":[98],"distractor":[99],"enable":[101],"acquire":[104],"citation":[105,142,164],"reasoning":[107],"skills":[108,162],"greater":[110],"sample":[111,179],"efficiency":[112],"generalizability,":[114],"demonstrating":[115],"strong":[116],"performance":[118],"even":[119],"as":[120],"number":[122],"irrelevant":[124],"passages":[125],"increases.We":[126],"benchmark":[127],"our":[128,144],"methods":[129],"three":[131],"open-domain":[132],"MHQA":[133],"datasets":[134],"report":[136],"significant":[137],"gains":[138],"in":[139],"accuracy.Furthermore,":[143],"provide":[146],"empirical":[147],"insights":[148],"into":[149],"how":[150,167],"simpler":[151],"can":[154],"give":[155],"stronger":[157],"signals":[158],"learning":[160],"specific":[161],"(e.g.,":[163,172],"generation)":[165],"different":[168],"components":[169],"posttraining":[171],"set":[174],"construction,":[175],"rulebased":[176],"rewards,":[177],"ordering,":[180],"etc.)":[181],"impact":[182],"final":[183],"performance.":[185],"1":[186]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-24T00:00:00"}
