{"id":"https://openalex.org/W4392903391","doi":"https://doi.org/10.1109/icassp48485.2024.10447380","title":"T-Foley: A Controllable Waveform-Domain Diffusion Model for Temporal-Event-Guided Foley Sound Synthesis","display_name":"T-Foley: A Controllable Waveform-Domain Diffusion Model for Temporal-Event-Guided Foley Sound Synthesis","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903391","doi":"https://doi.org/10.1109/icassp48485.2024.10447380"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447380","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447380","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012340838","display_name":"Yoonjin Chung","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]},{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["CA","KR"],"is_corresponding":true,"raw_author_name":"Yoonjin Chung","raw_affiliation_strings":["KAIST,Graduate School of Artificial Intelligence,Republic of Korea","Graduate School of Artificial Intelligence, KAIST, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"KAIST,Graduate School of Artificial Intelligence,Republic of Korea","institution_ids":["https://openalex.org/I4210164862"]},{"raw_affiliation_string":"Graduate School of Artificial Intelligence, KAIST, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101807412","display_name":"Jun-Won Lee","orcid":"https://orcid.org/0000-0003-2729-9113"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]},{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA","KR"],"is_corresponding":false,"raw_author_name":"Junwon Lee","raw_affiliation_strings":["KAIST,Graduate School of Artificial Intelligence,Republic of Korea","Graduate School of Artificial Intelligence, KAIST, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"KAIST,Graduate School of Artificial Intelligence,Republic of Korea","institution_ids":["https://openalex.org/I4210164862"]},{"raw_affiliation_string":"Graduate School of Artificial Intelligence, KAIST, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056437111","display_name":"Juhan Nam","orcid":"https://orcid.org/0000-0003-2664-2119"},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]},{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["CA","KR"],"is_corresponding":false,"raw_author_name":"Juhan Nam","raw_affiliation_strings":["KAIST,Graduate School of Artificial Intelligence,Republic of Korea","Graduate School of Artificial Intelligence, KAIST, Republic of Korea","Graduate School of Culture Technology, KAIST, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"KAIST,Graduate School of Artificial Intelligence,Republic of Korea","institution_ids":["https://openalex.org/I4210164862"]},{"raw_affiliation_string":"Graduate School of Artificial Intelligence, KAIST, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"Graduate School of Culture Technology, KAIST, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5012340838"],"corresponding_institution_ids":["https://openalex.org/I157485424","https://openalex.org/I4210164862"],"apc_list":null,"apc_paid":null,"fwci":4.7063,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.95477599,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"6820","last_page":"6824"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/foley","display_name":"Foley","score":0.8446786999702454},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7887871265411377},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.5812571048736572},{"id":"https://openalex.org/keywords/sound-quality","display_name":"Sound quality","score":0.47545310854911804},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.43505775928497314},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3842620849609375},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32379215955734253},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.15405717492103577}],"concepts":[{"id":"https://openalex.org/C110446960","wikidata":"https://www.wikidata.org/wiki/Q762316","display_name":"Foley","level":2,"score":0.8446786999702454},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7887871265411377},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.5812571048736572},{"id":"https://openalex.org/C167310288","wikidata":"https://www.wikidata.org/wiki/Q7564808","display_name":"Sound quality","level":2,"score":0.47545310854911804},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.43505775928497314},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3842620849609375},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32379215955734253},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.15405717492103577},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447380","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447380","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/13","score":0.5,"display_name":"Climate action"}],"awards":[{"id":"https://openalex.org/G1769687045","display_name":null,"funder_award_id":"2019-0-00075","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G2148870006","display_name":null,"funder_award_id":"2019-0-00075","funder_id":"https://openalex.org/F4320324161","funder_display_name":"Korea Advanced Institute of Science and Technology"},{"id":"https://openalex.org/G3034753964","display_name":null,"funder_award_id":"grant","funder_id":"https://openalex.org/F4320320671","funder_display_name":"National Research Foundation"},{"id":"https://openalex.org/G342704958","display_name":null,"funder_award_id":"funded","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G398188885","display_name":null,"funder_award_id":"2019-0-0007","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G4628738581","display_name":null,"funder_award_id":"No. 2019-0-00075","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G515828364","display_name":null,"funder_award_id":"No. RS-","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G5415505637","display_name":null,"funder_award_id":"2019-0-00075","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G6072120315","display_name":null,"funder_award_id":"funded","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G626505518","display_name":null,"funder_award_id":"No. 201","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G7064264939","display_name":null,"funder_award_id":"2019-0-00075","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G904912805","display_name":null,"funder_award_id":"RS-2023-00222383","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"}],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"},{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"},{"id":"https://openalex.org/F4320324161","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63"},{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2167700723","https://openalex.org/W2526050071","https://openalex.org/W2760103357","https://openalex.org/W2972745527","https://openalex.org/W2995348821","https://openalex.org/W3007605881","https://openalex.org/W3046890131","https://openalex.org/W3094550259","https://openalex.org/W3105017218","https://openalex.org/W3177150392","https://openalex.org/W3187009280","https://openalex.org/W3207785894","https://openalex.org/W3214281017","https://openalex.org/W4225856875","https://openalex.org/W4286901121","https://openalex.org/W4367359628","https://openalex.org/W4372263438","https://openalex.org/W4375868912","https://openalex.org/W4386071828","https://openalex.org/W6767367760","https://openalex.org/W6782757012","https://openalex.org/W6782760101","https://openalex.org/W6796762324","https://openalex.org/W6797906067","https://openalex.org/W6804172200","https://openalex.org/W6840815571","https://openalex.org/W6845479124","https://openalex.org/W6849109464","https://openalex.org/W6852019497"],"related_works":["https://openalex.org/W2523036031","https://openalex.org/W4237383858","https://openalex.org/W3211743321","https://openalex.org/W2016968289","https://openalex.org/W2021759681","https://openalex.org/W2011071567","https://openalex.org/W4256291417","https://openalex.org/W2059053411","https://openalex.org/W4248667156","https://openalex.org/W2006304570"],"abstract_inverted_index":{"Foley":[0,26,63,74,120],"sound,":[1],"audio":[2,80],"content":[3],"inserted":[4],"synchronously":[5],"with":[6,123],"videos,":[7],"plays":[8],"a":[9,43,48,68,96,101],"critical":[10],"role":[11],"in":[12,25,32,58,111,134],"the":[13,30,59,84,124,145],"user":[14],"experience":[15],"of":[16,62],"multimedia":[17],"content.":[18],"Recently,":[19],"there":[20],"has":[21],"been":[22],"active":[23],"research":[24],"sound":[27,45,50,75,85,121],"synthesis,":[28],"leveraging":[29],"advancements":[31],"deep":[33],"generative":[34],"models.":[35],"However,":[36],"such":[37],"works":[38],"mainly":[39],"focus":[40],"on":[41,147],"replicating":[42],"single":[44],"class":[46,86],"or":[47],"textual":[49],"description,":[51],"neglecting":[52],"temporal":[53,88,92,97,125,140],"information,":[54],"which":[55],"is":[56],"crucial":[57],"practical":[60,131],"applications":[61],"sound.":[64],"We":[65,143],"present":[66],"T-Foley,":[67],"Temporal-event-guided":[69],"waveform":[70],"generation":[71],"model":[72],"for":[73,139],"synthesis.":[76],"T-Foley":[77,107],"generates":[78,119],"high-quality":[79],"using":[81],"two":[82],"conditions:":[83],"and":[87,100,114,118],"event":[89,98,141],"feature.":[90],"For":[91],"conditioning,":[93],"we":[94,128],"devise":[95],"feature":[99],"novel":[102],"conditioning":[103],"technique":[104],"named":[105],"Block-FiLM.":[106],"achieves":[108],"superior":[109],"performance":[110],"both":[112],"objective":[113],"subjective":[115],"evaluation":[116],"metrics":[117],"well-synchronized":[122],"events.":[126],"Additionally,":[127],"showcase":[129],"T-Foley\u2019s":[130],"applications,":[132],"particularly":[133],"scenarios":[135],"involving":[136],"vocal":[137],"mimicry":[138],"control.":[142],"show":[144],"demo":[146],"our":[148],"companion":[149],"website.":[150],"<sup":[151],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[152],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[153]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":11}],"updated_date":"2026-03-16T09:10:04.655348","created_date":"2025-10-10T00:00:00"}
