{"id":"https://openalex.org/W7138024237","doi":"https://doi.org/10.1609/aaai.v40i31.39885","title":"One-Step Generative Policies with Q-Learning: A Reformulation of MeanFlow","display_name":"One-Step Generative Policies with Q-Learning: A Reformulation of MeanFlow","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138024237","doi":"https://doi.org/10.1609/aaai.v40i31.39885"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i31.39885","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i31.39885","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i31.39885","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129719347","display_name":"Zeyuan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zeyuan Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129727340","display_name":"Da Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Da Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129742958","display_name":"Yulin Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yulin Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129705748","display_name":"Ye Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye Shi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129647863","display_name":"Liang Bai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang Bai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107943977","display_name":"Tianyuan Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tianyuan Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129664351","display_name":"Yanwei Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yanwei Fu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5129719347"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.24612092,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"31","first_page":"26751","last_page":"26759"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6183000206947327,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6183000206947327,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.11569999903440475,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.05270000174641609,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.769599974155426},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.7129999995231628},{"id":"https://openalex.org/keywords/policy-learning","display_name":"Policy learning","score":0.5688999891281128},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5357000231742859},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.519599974155426},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.46790000796318054},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.4440000057220459}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.769599974155426},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.7129999995231628},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7078999876976013},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6133999824523926},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.5688999891281128},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5357000231742859},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.519599974155426},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5095999836921692},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.46790000796318054},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.4440000057220459},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4018000066280365},{"id":"https://openalex.org/C92811239","wikidata":"https://www.wikidata.org/wiki/Q20998670","display_name":"Expressivity","level":2,"score":0.35510000586509705},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.33009999990463257},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.2842999994754791},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.2547999918460846}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i31.39885","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i31.39885","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i31.39885","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i31.39885","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.5351591110229492}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,92],"introduce":[1],"a":[2,17,82,138],"one-step":[3,28,118],"generative":[4],"policy":[5,84,108,133],"for":[6,88],"offline":[7,162],"reinforcement":[8,165],"learning":[9,134,166],"that":[10,103,154],"maps":[11],"*noise*":[12],"directly":[13],"to":[14,36,65,68],"*actions*":[15],"via":[16,135],"*residual":[18,101],"reformulation*":[19],"of":[20,124],"MeanFlow,":[21],"making":[22],"it":[23],"compatible":[24],"with":[25,57],"Q-learning.":[26,58],"While":[27],"Gaussian":[29],"policies":[30],"enable":[31,69],"fast":[32],"inference,":[33],"they":[34],"struggle":[35],"capture":[37],"complex,":[38],"multimodal":[39,125],"action":[40,126],"distributions.":[41],"Existing":[42],"flow-based":[43],"methods":[44],"improve":[45],"expressivity":[46],"but":[47],"typically":[48],"rely":[49],"on":[50,144],"distillation":[51],"and":[52,78,97,106,128,131,150,163],"two-stage":[53],"training":[54,140],"when":[55],"trained":[56],"To":[59],"overcome":[60],"these":[61],"limitations,":[62],"we":[63],"propose":[64],"reformulate":[66],"MeanFlow":[67],"*direct":[70],"noise-to-action":[71,79,119],"generation*":[72],"by":[73],"integrating":[74],"the":[75,86,148],"velocity":[76,90],"field":[77],"transformation":[80],"into":[81],"single":[83],"network\u2014eliminating":[85],"need":[87],"separate":[89],"estimation.":[91],"explore":[93],"several":[94],"reformulation":[95],"variants":[96],"identify":[98],"an":[99],"effective":[100],"formulation*":[102],"supports":[104],"expressive":[105,122],"stable":[107,132],"learning.":[109],"Our":[110],"method":[111,156],"offers":[112],"three":[113],"key":[114],"advantages:":[115],"1)":[116],"efficient":[117,130],"generation,":[120],"2)":[121],"modelling":[123],"distributions,":[127],"3)":[129],"Q-learning":[136],"in":[137,160],"single-stage":[139],"setup.":[141],"Extensive":[142],"experiments":[143],"73":[145],"tasks":[146],"across":[147],"OGBench":[149],"D4RL":[151],"benchmarks":[152],"demonstrate":[153],"our":[155],"achieves":[157],"strong":[158],"performance":[159],"both":[161],"offline-to-online":[164],"settings.":[167]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
