{"id":"https://openalex.org/W7152745826","doi":"https://doi.org/10.48550/arxiv.2604.06491","title":"Discrete Flow Matching Policy Optimization","display_name":"Discrete Flow Matching Policy Optimization","publication_year":2026,"publication_date":"2026-04-07","ids":{"openalex":"https://openalex.org/W7152745826","doi":"https://doi.org/10.48550/arxiv.2604.06491"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.06491","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06491","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.06491","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102684380","display_name":"Maojiang Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Su, Maojiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133256037","display_name":"Po-Chung Hsieh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hsieh, Po-Chung","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133274611","display_name":"Weimin Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Weimin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101942165","display_name":"Ming Lu","orcid":"https://orcid.org/0000-0003-0517-7932"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Mingcheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133302147","display_name":"Jiunhau Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jiunhau","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085696228","display_name":"Jerry Yao-Chieh Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Jerry Yao-Chieh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133301271","display_name":"Han Liu","orcid":"https://orcid.org/0009-0008-6850-9811"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Han","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5102684380"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7491000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7491000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.04010000079870224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.01549999974668026,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.532800018787384},{"id":"https://openalex.org/keywords/upper-and-lower-bounds","display_name":"Upper and lower bounds","score":0.5291000008583069},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.5152999758720398},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.5113000273704529},{"id":"https://openalex.org/keywords/discretization","display_name":"Discretization","score":0.39989998936653137},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.39100000262260437},{"id":"https://openalex.org/keywords/design-for-manufacturability","display_name":"Design for manufacturability","score":0.3896999955177307},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.37070000171661377},{"id":"https://openalex.org/keywords/discrete-time-and-continuous-time","display_name":"Discrete time and continuous time","score":0.37059998512268066},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.3652999997138977}],"concepts":[{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5357999801635742},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.532800018787384},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.5291000008583069},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.5152999758720398},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.5113000273704529},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4973999857902527},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.44110000133514404},{"id":"https://openalex.org/C73000952","wikidata":"https://www.wikidata.org/wiki/Q17007827","display_name":"Discretization","level":2,"score":0.39989998936653137},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.39149999618530273},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.39100000262260437},{"id":"https://openalex.org/C62064638","wikidata":"https://www.wikidata.org/wiki/Q553878","display_name":"Design for manufacturability","level":2,"score":0.3896999955177307},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.37070000171661377},{"id":"https://openalex.org/C55689738","wikidata":"https://www.wikidata.org/wiki/Q15963867","display_name":"Discrete time and continuous time","level":2,"score":0.37059998512268066},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.3652999997138977},{"id":"https://openalex.org/C2776330181","wikidata":"https://www.wikidata.org/wiki/Q18358244","display_name":"Maximization","level":2,"score":0.35580000281333923},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.33059999346733093},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.31630000472068787},{"id":"https://openalex.org/C152361515","wikidata":"https://www.wikidata.org/wiki/Q181328","display_name":"Bernoulli's principle","level":2,"score":0.30970001220703125},{"id":"https://openalex.org/C145671259","wikidata":"https://www.wikidata.org/wiki/Q1493786","display_name":"Discrete optimization","level":3,"score":0.30959999561309814},{"id":"https://openalex.org/C2776416436","wikidata":"https://www.wikidata.org/wiki/Q3751781","display_name":"Domino","level":3,"score":0.30250000953674316},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.30000001192092896},{"id":"https://openalex.org/C38349280","wikidata":"https://www.wikidata.org/wiki/Q1434290","display_name":"Flow (mathematics)","level":2,"score":0.2973000109195709},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.29649999737739563},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.2840999960899353},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.28369998931884766},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.2822999954223633},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.267300009727478},{"id":"https://openalex.org/C17098449","wikidata":"https://www.wikidata.org/wiki/Q176814","display_name":"Partially observable Markov decision process","level":4,"score":0.2623000144958496},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C41054675","wikidata":"https://www.wikidata.org/wiki/Q185547","display_name":"Binomial distribution","level":2,"score":0.2531999945640564},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.2531999945640564},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.2531000077724457},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.25189998745918274}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.06491","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06491","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.06491","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06491","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7872112393379211,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"introduce":[1,93],"Discrete":[2,16],"flow":[3],"Matching":[4,18],"policy":[5,26,89],"Optimization":[6],"(DoMinO),":[7],"a":[8,22,40,48,58],"unified":[9],"framework":[10,173],"for":[11,123,174],"Reinforcement":[12],"Learning":[13],"(RL)":[14],"fine-tuning":[15,54,85],"Flow":[17],"(DFM)":[19],"models":[20],"under":[21],"broad":[23],"class":[24],"of":[25,53,117],"gradient":[27],"methods.":[28,86],"Our":[29],"key":[30],"idea":[31],"is":[32],"to":[33,97,103],"view":[34],"the":[35,67,99,104,114,124,146,157],"DFM":[36,69],"sampling":[37],"procedure":[38],"as":[39,57,170],"multi-step":[41],"Markov":[42],"Decision":[43],"Process.":[44],"This":[45],"perspective":[46],"provides":[47],"simple":[49],"and":[50,77,119,141],"transparent":[51],"reformulation":[52],"reward":[55],"maximization":[56],"robust":[59],"RL":[60,84],"objective.":[61],"Consequently,":[62],"it":[63],"not":[64],"only":[65],"preserves":[66],"original":[68],"samplers":[70],"but":[71],"also":[72,92],"avoids":[73],"biased":[74],"auxiliary":[75],"estimators":[76],"likelihood":[78],"surrogates":[79],"used":[80],"by":[81],"many":[82],"prior":[83],"To":[87],"prevent":[88],"collapse,":[90],"we":[91,108,127],"new":[94],"total-variation":[95],"regularizers":[96],"keep":[98],"fine-tuned":[100],"distribution":[101,160],"close":[102],"pretrained":[105],"one.":[106],"Theoretically,":[107],"establish":[109,168],"an":[110,171],"upper":[111,121],"bound":[112],"on":[113,130],"discretization":[115],"error":[116],"DoMinO":[118,129,135,169],"tractable":[120],"bounds":[122],"regularizers.":[125],"Experimentally,":[126],"evaluate":[128],"regulatory":[131],"DNA":[132],"sequence":[133,143,159,177],"design.":[134],"achieves":[136],"stronger":[137],"predicted":[138],"enhancer":[139],"activity":[140],"better":[142],"naturalness":[144],"than":[145],"previous":[147],"best":[148],"reward-driven":[149],"baselines.":[150],"The":[151],"regularization":[152],"further":[153],"improves":[154],"alignment":[155],"with":[156],"natural":[158],"while":[161],"preserving":[162],"strong":[163],"functional":[164],"performance.":[165],"These":[166],"results":[167],"useful":[172],"controllable":[175],"discrete":[176],"generation.":[178]},"counts_by_year":[],"updated_date":"2026-04-10T06:07:51.998497","created_date":"2026-04-10T00:00:00"}
