{"id":"https://openalex.org/W4402111261","doi":"https://doi.org/10.21437/interspeech.2024-2467","title":"LiteFocus: Accelerated Diffusion Inference for Long Audio Synthesis","display_name":"LiteFocus: Accelerated Diffusion Inference for Long Audio Synthesis","publication_year":2024,"publication_date":"2024-09-01","ids":{"openalex":"https://openalex.org/W4402111261","doi":"https://doi.org/10.21437/interspeech.2024-2467"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2024-2467","is_oa":true,"landing_page_url":"https://doi.org/10.21437/interspeech.2024-2467","pdf_url":"https://www.isca-archive.org/interspeech_2024/tan24c_interspeech.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2024","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.isca-archive.org/interspeech_2024/tan24c_interspeech.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101336174","display_name":"Zhenxiong Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhenxiong Tan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109748042","display_name":"Xinyin Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xinyin Ma","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114001682","display_name":"Gongfan Fang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gongfan Fang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5015574447","display_name":"Xinchao Wang","orcid":"https://orcid.org/0000-0003-0057-1404"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xinchao Wang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101336174"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1521027,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4878","last_page":"4882"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9817000031471252,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7215622067451477},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.634830117225647},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.6230210065841675},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.38242676854133606},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.27873778343200684}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7215622067451477},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.634830117225647},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.6230210065841675},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.38242676854133606},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.27873778343200684},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2024-2467","is_oa":true,"landing_page_url":"https://doi.org/10.21437/interspeech.2024-2467","pdf_url":"https://www.isca-archive.org/interspeech_2024/tan24c_interspeech.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2024","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.21437/interspeech.2024-2467","is_oa":true,"landing_page_url":"https://doi.org/10.21437/interspeech.2024-2467","pdf_url":"https://www.isca-archive.org/interspeech_2024/tan24c_interspeech.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2024","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4402111261.pdf","grobid_xml":"https://content.openalex.org/works/W4402111261.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Latent":[0],"diffusion":[1,72],"models":[2,73],"have":[3],"shown":[4],"promising":[5],"results":[6],"in":[7,74,81,126],"audio":[8,22,30,51,70,76,109,129,135],"generation,":[9],"making":[10],"notable":[11],"advancements":[12],"over":[13],"traditional":[14],"methods.However,":[15],"their":[16],"performance,":[17],"while":[18,107,131],"impressive":[19],"with":[20,120],"short":[21],"clips,":[23,44],"faces":[24],"challenges":[25,32],"when":[26],"extended":[27],"to":[28,35,49,55],"longer":[29,50],"sequences.These":[31],"are":[33],"due":[34],"model's":[36],"selfattention":[37],"mechanism":[38],"and":[39,96],"training":[40],"predominantly":[41],"on":[42,117],"10-second":[43],"which":[45,99],"complicates":[46],"the":[47,66,78,101],"extension":[48],"without":[52],"adaptation.In":[53],"response":[54],"these":[56],"issues,":[57],"we":[58,83],"introduce":[59],"a":[60,85],"novel":[61],"approach,":[62],"LiteFocus":[63],"that":[64],"enhances":[65],"inference":[67,118],"of":[68],"existing":[69],"latent":[71],"long":[75],"synthesis.Observed":[77],"attention":[79,90,102],"pattern":[80],"self-attention,":[82],"employ":[84],"dual":[86],"sparse":[87],"form":[88],"for":[89],"calculation,":[91],"designated":[92],"as":[93],"same-frequency":[94,105],"focus":[95],"cross-frequency":[97,112],"compensation,":[98],"curtails":[100],"computation":[103],"under":[104],"constraints,":[106],"enhancing":[108],"quality":[110],"through":[111],"refillment.LiteFocus":[113],"demonstrates":[114],"substantial":[115],"reduction":[116],"time":[119],"diffusion-based":[121],"TTA":[122],"model":[123],"by":[124],"1.99":[125],"synthesizing":[127],"80second":[128],"clips":[130],"also":[132],"obtaining":[133],"improved":[134],"quality.":[136]},"counts_by_year":[],"updated_date":"2026-03-14T06:41:57.775601","created_date":"2025-10-10T00:00:00"}
