{"id":"https://openalex.org/W7127656514","doi":"https://doi.org/10.48550/arxiv.2602.03619","title":"Learning Query-Specific Rubrics from Human Preferences for DeepResearch Report Generation","display_name":"Learning Query-Specific Rubrics from Human Preferences for DeepResearch Report Generation","publication_year":2026,"publication_date":"2026-02-03","ids":{"openalex":"https://openalex.org/W7127656514","doi":"https://doi.org/10.48550/arxiv.2602.03619"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.03619","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.03619","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.03619","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113112983","display_name":"Changze Lv","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Lv, Changze","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125043142","display_name":"Jie Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Jie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125050506","display_name":"Wentao Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Wentao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124994628","display_name":"Jingwen Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Jingwen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122369602","display_name":"Zisu Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Zisu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081624037","display_name":"Tian Miao","orcid":"https://orcid.org/0000-0002-5888-2064"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Muzhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125075529","display_name":"Shihan Dou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dou, Shihan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125038725","display_name":"Tao Gui","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gui, Tao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113185192","display_name":"Le Tian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Le","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124977213","display_name":"Xiao Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Xiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125000293","display_name":"Xiaoqing Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Xiaoqing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125084931","display_name":"Xuanjing Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Xuanjing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125043142","display_name":"Jie Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Jie","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5113112983"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3043999969959259,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3043999969959259,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.2854999899864197,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.051100000739097595,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rubric","display_name":"Rubric","score":0.9779999852180481},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6703000068664551},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6363999843597412},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.5990999937057495},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4438000023365021},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.39320001006126404}],"concepts":[{"id":"https://openalex.org/C111640148","wikidata":"https://www.wikidata.org/wiki/Q847349","display_name":"Rubric","level":2,"score":0.9779999852180481},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7071999907493591},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6703000068664551},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6363999843597412},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.5990999937057495},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4530999958515167},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4438000023365021},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4016999900341034},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.39320001006126404},{"id":"https://openalex.org/C519536355","wikidata":"https://www.wikidata.org/wiki/Q21021151","display_name":"Repurposing","level":2,"score":0.3531999886035919},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.3082999885082245},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.30559998750686646},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.2969000041484833},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.2556999921798706},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2554999887943268},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.2502000033855438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.03619","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.03619","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.03619","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.03619","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7158334851264954,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Nowadays,":[0],"training":[1,145],"and":[2,47,84,99,130,163],"evaluating":[3],"DeepResearch-generated":[4],"reports":[5],"remain":[6],"challenging":[7],"due":[8],"to":[9,49,58,167],"the":[10,143,160],"lack":[11,34],"of":[12,74,169],"verifiable":[13],"reward":[14,94],"signals.":[15],"Accordingly,":[16],"rubric-based":[17],"evaluation":[18],"has":[19],"become":[20],"a":[21,56,72,92,111],"common":[22],"practice.":[23],"However,":[24],"existing":[25,135],"approaches":[26],"either":[27],"rely":[28],"on":[29,39,159],"coarse,":[30],"pre-defined":[31],"rubrics":[32,43],"that":[33,44,122,168],"sufficient":[35],"granularity,":[36],"or":[37],"depend":[38],"manually":[40],"constructed":[41],"query-specific":[42,61],"are":[45],"costly":[46],"difficult":[48],"scale.":[50],"In":[51],"this":[52],"paper,":[53],"we":[54,108],"propose":[55],"pipeline":[57],"train":[59,85],"human-preference-aligned":[60],"rubric":[62,86,101,125,136,152],"generators":[63,87,126,153],"tailored":[64],"for":[65,116],"DeepResearch":[66,147,161],"report":[67,117],"generation.":[68,118],"We":[69,119],"first":[70],"construct":[71],"dataset":[73],"DeepResearch-style":[75],"queries":[76],"annotated":[77],"with":[78,91,150],"human":[79,96],"preferences":[80],"over":[81],"paired":[82],"reports,":[83],"via":[88],"reinforcement":[89],"learning":[90],"hybrid":[93],"combining":[95],"preference":[97],"supervision":[98,133],"LLM-based":[100],"evaluation.":[102],"To":[103],"better":[104,131],"handle":[105],"long-horizon":[106],"reasoning,":[107],"further":[109],"introduce":[110],"Multi-agent":[112],"Markov-state":[113],"(MaMs)":[114],"workflow":[115],"empirically":[120],"show":[121],"our":[123,151],"proposed":[124],"deliver":[127],"more":[128],"discriminative":[129],"human-aligned":[132],"than":[134],"design":[137],"strategies.":[138],"Moreover,":[139],"when":[140],"integrated":[141],"into":[142],"MaMs":[144],"framework,":[146],"systems":[148],"equipped":[149],"consistently":[154],"outperform":[155],"all":[156],"open-source":[157],"baselines":[158],"Bench":[162],"achieve":[164],"performance":[165],"comparable":[166],"leading":[170],"closed-source":[171],"models.":[172]},"counts_by_year":[],"updated_date":"2026-02-06T02:05:47.483045","created_date":"2026-02-06T00:00:00"}
