{"id":"https://openalex.org/W4406461857","doi":"https://doi.org/10.1109/slt61566.2024.10832165","title":"Data Efficient Reflow for Few Step Audio Generation","display_name":"Data Efficient Reflow for Few Step Audio Generation","publication_year":2024,"publication_date":"2024-12-02","ids":{"openalex":"https://openalex.org/W4406461857","doi":"https://doi.org/10.1109/slt61566.2024.10832165"},"language":"en","primary_location":{"id":"doi:10.1109/slt61566.2024.10832165","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt61566.2024.10832165","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087616209","display_name":"Lemeng Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Lemeng Wu","raw_affiliation_strings":["Meta,USA"],"affiliations":[{"raw_affiliation_string":"Meta,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031088292","display_name":"Zhaoheng Ni","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhaoheng Ni","raw_affiliation_strings":["Meta,USA"],"affiliations":[{"raw_affiliation_string":"Meta,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100672423","display_name":"Bowen Shi","orcid":"https://orcid.org/0000-0002-0689-9964"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bowen Shi","raw_affiliation_strings":["Meta FAIR,USA"],"affiliations":[{"raw_affiliation_string":"Meta FAIR,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012026156","display_name":"Ga\u00ebl Le Lan","orcid":"https://orcid.org/0000-0002-1493-5777"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gael Le Lan","raw_affiliation_strings":["Meta,USA"],"affiliations":[{"raw_affiliation_string":"Meta,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080751032","display_name":"Anurag Kumar","orcid":"https://orcid.org/0000-0002-1164-144X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anurag Kumar","raw_affiliation_strings":["Meta,USA"],"affiliations":[{"raw_affiliation_string":"Meta,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034208747","display_name":"Varun Nagaraja","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Varun Nagaraja","raw_affiliation_strings":["Meta,USA"],"affiliations":[{"raw_affiliation_string":"Meta,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070892237","display_name":"Xinhao Mei","orcid":"https://orcid.org/0000-0001-6079-5130"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xinhao Mei","raw_affiliation_strings":["Meta,USA"],"affiliations":[{"raw_affiliation_string":"Meta,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100575188","display_name":"Yunyang Xiong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yunyang Xiong","raw_affiliation_strings":["Meta,USA"],"affiliations":[{"raw_affiliation_string":"Meta,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015226839","display_name":"Bilge Soran","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bilge Soran","raw_affiliation_strings":["Meta,USA"],"affiliations":[{"raw_affiliation_string":"Meta,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039488725","display_name":"Raghuraman Krishnamoorthi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Raghuraman Krishnamoorthi","raw_affiliation_strings":["Meta,USA"],"affiliations":[{"raw_affiliation_string":"Meta,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051950818","display_name":"Wei-Ning Hsu","orcid":"https://orcid.org/0000-0001-5546-5217"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei-Ning Hsu","raw_affiliation_strings":["Meta FAIR,USA"],"affiliations":[{"raw_affiliation_string":"Meta FAIR,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115596237","display_name":"Yangyang Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yangyang Shi","raw_affiliation_strings":["Meta,USA"],"affiliations":[{"raw_affiliation_string":"Meta,USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016704219","display_name":"Vikas Chandra","orcid":"https://orcid.org/0009-0005-4996-8455"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vikas Chandra","raw_affiliation_strings":["Meta,USA"],"affiliations":[{"raw_affiliation_string":"Meta,USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5087616209"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.29473177,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"455","last_page":"461"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.666294515132904},{"id":"https://openalex.org/keywords/digital-audio-broadcasting","display_name":"Digital audio broadcasting","score":0.44539526104927063},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.11734867095947266}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.666294515132904},{"id":"https://openalex.org/C2779106878","wikidata":"https://www.wikidata.org/wiki/Q1257510","display_name":"Digital audio broadcasting","level":2,"score":0.44539526104927063},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.11734867095947266}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt61566.2024.10832165","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt61566.2024.10832165","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.6600000262260437,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1583837637","https://openalex.org/W3215615641","https://openalex.org/W4206662443","https://openalex.org/W4221159371","https://openalex.org/W4252812408","https://openalex.org/W4281661987","https://openalex.org/W4288089799","https://openalex.org/W4288099666","https://openalex.org/W4297676498","https://openalex.org/W4303647933","https://openalex.org/W4307323391","https://openalex.org/W4308167501","https://openalex.org/W4312933868","https://openalex.org/W4372260310","https://openalex.org/W4380551955","https://openalex.org/W4381786045","https://openalex.org/W4386065690","https://openalex.org/W4386722101","https://openalex.org/W4387389604","https://openalex.org/W4388093137","https://openalex.org/W4390306858","https://openalex.org/W4391709184","https://openalex.org/W4392904491","https://openalex.org/W4394656839","https://openalex.org/W4402753769","https://openalex.org/W6757220786","https://openalex.org/W6769627184","https://openalex.org/W6809884996","https://openalex.org/W6810595431","https://openalex.org/W6838327568","https://openalex.org/W6840815571","https://openalex.org/W6843731886","https://openalex.org/W6845479124","https://openalex.org/W6846539466","https://openalex.org/W6846827642","https://openalex.org/W6846849257","https://openalex.org/W6849109464","https://openalex.org/W6853096648","https://openalex.org/W6853515095","https://openalex.org/W6856548676","https://openalex.org/W6857588802","https://openalex.org/W6857842755","https://openalex.org/W6859583170","https://openalex.org/W6861353174","https://openalex.org/W6864565647"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Flow":[0],"matching":[1,135],"has":[2],"been":[3],"successfully":[4],"applied":[5],"onto":[6],"generative":[7],"models,":[8],"particularly":[9],"in":[10,25],"producing":[11],"high-quality":[12],"images":[13],"and":[14,97,128,153],"audio.":[15],"However,":[16],"the":[17,22,46,55,80,100,124,133,158,170,175,180],"iterative":[18],"sampling":[19,58],"required":[20],"for":[21,70],"ODE":[23,47],"solver":[24],"flow":[26,82,134],"matching-based":[27,83],"approaches":[28,69],"can":[29],"be":[30],"time-consuming.":[31],"Reflow":[32],"finetune,":[33],"a":[34,41,50,115,138],"technique":[35],"derived":[36],"from":[37],"Rectified":[38],"flow,":[39],"offers":[40],"promising":[42],"solution":[43],"by":[44],"transforming":[45],"trajectory":[48,129],"into":[49],"straight":[51],"one,":[52],"thereby":[53],"reducing":[54],"number":[56],"of":[57,102,140,160],"steps.":[59],"In":[60],"this":[61,111,141],"paper,":[62],"we":[63,113],"focus":[64],"on":[65,179],"developing":[66],"data-efficient":[67,117],"flow-based":[68],"text-to-audio":[71,181],"generation.":[72],"We":[73],"found":[74],"that":[75,169],"directly":[76],"applying":[77],"reflow":[78,118,125,177],"to":[79,105,130,151,164],"pre-trained":[81],"audio":[84],"generation":[85,182],"models":[86],"is":[87],"typically":[88],"computationally":[89],"expensive.":[90],"It":[91],"requires":[92,145],"over":[93],"50,000":[94],"training":[95,103,161],"iterations":[96],"five":[98],"times":[99,157],"amount":[101],"data":[104,126,154,162],"achieve":[106],"satisfactory":[107],"results.":[108],"To":[109],"address":[110],"issue,":[112],"introduce":[114],"novel":[116],"(DEreflow)":[119],"method.":[120],"This":[121],"method":[122,178],"modifies":[123],"pairs":[127,155],"align":[131],"with":[132],"distribution.":[136],"As":[137],"result":[139],"alignment,":[142],"our":[143],"approach":[144],"significantly":[146],"fewer":[147],"steps":[148],"(8,000":[149],"compared":[150,163],"50,000)":[152],"$(0.5$":[156],"scale":[159],"5":[165],"times).":[166],"Results":[167],"show":[168],"proposed":[171],"DEreflow":[172],"consistently":[173],"outperforms":[174],"original":[176],"task.":[183]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
