{"id":"https://openalex.org/W7155034729","doi":"https://doi.org/10.48550/arxiv.2604.17325","title":"Align Documents to Questions: Question-Oriented Document Rewriting for Retrieval-Augmented Generation","display_name":"Align Documents to Questions: Question-Oriented Document Rewriting for Retrieval-Augmented Generation","publication_year":2026,"publication_date":"2026-04-19","ids":{"openalex":"https://openalex.org/W7155034729","doi":"https://doi.org/10.48550/arxiv.2604.17325"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.17325","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17325","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.17325","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134102406","display_name":"Jiaang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jiaang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134125104","display_name":"Zhendong Mao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mao, Zhendong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134175876","display_name":"Quan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Quan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114198413","display_name":"Yuning Wan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wan, Yuning","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134177429","display_name":"Yongdong Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yongdong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9035000205039978,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9035000205039978,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.03610000014305115,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.007199999876320362,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rewriting","display_name":"Rewriting","score":0.7943999767303467},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.7627999782562256},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6008999943733215},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.508400022983551},{"id":"https://openalex.org/keywords/readability","display_name":"Readability","score":0.428600013256073},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.3926999866962433},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.37959998846054077}],"concepts":[{"id":"https://openalex.org/C154690210","wikidata":"https://www.wikidata.org/wiki/Q1668499","display_name":"Rewriting","level":2,"score":0.7943999767303467},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7889000177383423},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.7627999782562256},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6008999943733215},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.508400022983551},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.44909998774528503},{"id":"https://openalex.org/C2778143727","wikidata":"https://www.wikidata.org/wiki/Q1820650","display_name":"Readability","level":2,"score":0.428600013256073},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39899998903274536},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.3926999866962433},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.37959998846054077},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.3718000054359436},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36970001459121704},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.36390000581741333},{"id":"https://openalex.org/C62989814","wikidata":"https://www.wikidata.org/wiki/Q854648","display_name":"Gossip","level":2,"score":0.3068999946117401},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3012999892234802},{"id":"https://openalex.org/C199033989","wikidata":"https://www.wikidata.org/wiki/Q1318295","display_name":"Narrative","level":2,"score":0.2874000072479248},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.2809000015258789},{"id":"https://openalex.org/C93361087","wikidata":"https://www.wikidata.org/wiki/Q4426698","display_name":"Data consistency","level":2,"score":0.27250000834465027},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2615000009536743}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.17325","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17325","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.17325","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17325","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.8555520176887512,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Retrieval-Augmented":[0],"Generation":[1],"(RAG)":[2],"enhances":[3,146],"the":[4,47],"factuality":[5],"of":[6,49,87],"Large":[7],"Language":[8],"Models":[9],"(LLMs)":[10],"by":[11,54],"incorporating":[12],"retrieved":[13,41,50,69],"documents":[14,70],"and/or":[15],"generated":[16,34],"context.":[17],"However,":[18],"LLMs":[19],"often":[20],"exhibit":[21],"a":[22,64,72,104,138],"stylistic":[23,94],"bias":[24],"when":[25],"presented":[26],"with":[27,71,156,164],"mixed":[28],"contexts,":[29],"favoring":[30],"fluent":[31],"but":[32],"hallucinated":[33],"content":[35],"over":[36],"factually":[37],"grounded":[38],"yet":[39],"disorganized":[40],"evidence.":[42],"This":[43],"phenomenon":[44],"reveals":[45],"that":[46,67,143],"utility":[48],"information":[51],"is":[52],"bottlenecked":[53],"its":[55],"presentation.":[56],"To":[57],"bridge":[58],"this":[59],"gap,":[60],"we":[61],"propose":[62],"QREAM,":[63],"style-controlled":[65],"rewriter":[66],"aligns":[68],"question-oriented":[73],"style":[74],"while":[75],"preserving":[76],"facts,":[77],"better":[78],"for":[79],"LLM":[80],"readers":[81],"to":[82,96,126,152],"utilize.":[83],"Our":[84],"framework":[85],"consists":[86],"two":[88],"stages:":[89],"(1)":[90],"QREAM-ICL,":[91],"which":[92],"uses":[93],"seeds":[95],"guide":[97],"iterative":[98],"rewriting":[99],"exploration;":[100],"and":[101,123],"(2)":[102],"QREAM-FT,":[103],"lightweight":[105],"student":[106],"model":[107],"distilled":[108],"from":[109],"denoised":[110],"ICL":[111],"outputs.":[112],"QREAM-FT":[113],"employs":[114],"dual-criteria":[115],"rejection":[116],"sampling,":[117],"filtering":[118],"based":[119],"on":[120],"answer":[121],"correctness":[122],"factual":[124,165],"consistency":[125],"ensure":[127],"high-quality":[128],"supervision.":[129],"QREAM":[130,144],"seamlessly":[131],"integrates":[132],"into":[133],"existing":[134],"RAG":[135,148],"pipelines":[136],"as":[137],"plug-and-play":[139],"module.":[140],"Experiments":[141],"demonstrate":[142],"consistently":[145],"advanced":[147],"pipelines,":[149],"yielding":[150],"up":[151],"8%":[153],"relative":[154],"improvement":[155],"negligible":[157],"latency":[158],"overhead,":[159],"effectively":[160],"balancing":[161],"question":[162],"relevance":[163],"grounding.":[166]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-22T00:00:00"}
