{"id":"https://openalex.org/W4414359624","doi":"https://doi.org/10.24963/ijcai.2025/903","title":"BridgeVoC: Neural Vocoder with Schr\u00f6dinger Bridge","display_name":"BridgeVoC: Neural Vocoder with Schr\u00f6dinger Bridge","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W4414359624","doi":"https://doi.org/10.24963/ijcai.2025/903"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2025/903","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/903","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056414497","display_name":"Tong Lei","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tong Lei","raw_affiliation_strings":["Nanjing University; Tencent AI Lab"],"affiliations":[{"raw_affiliation_string":"Nanjing University; Tencent AI Lab","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100442940","display_name":"Zhiyu Zhang","orcid":"https://orcid.org/0009-0008-6298-9981"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhiyu Zhang","raw_affiliation_strings":["Southeast University"],"affiliations":[{"raw_affiliation_string":"Southeast University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049346285","display_name":"Rilin Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rilin Chen","raw_affiliation_strings":["Tencent AI Lab"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106407019","display_name":"Meng Yu","orcid":"https://orcid.org/0000-0002-0031-9156"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Yu","raw_affiliation_strings":["Tencent AI Lab"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059632088","display_name":"Jing L\u00fc","orcid":"https://orcid.org/0000-0001-9683-3768"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Lu","raw_affiliation_strings":["Nanjing University"],"affiliations":[{"raw_affiliation_string":"Nanjing University","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070266277","display_name":"Chengshi Zheng","orcid":"https://orcid.org/0000-0001-5656-994X"},"institutions":[{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengshi Zheng","raw_affiliation_strings":["Institute of Acoustics Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Acoustics Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034476404","display_name":"Dong Yu","orcid":"https://orcid.org/0000-0003-0520-6844"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dong Yu","raw_affiliation_strings":["Tencent AI Lab"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053757616","display_name":"Andong Li","orcid":"https://orcid.org/0000-0003-4094-8448"},"institutions":[{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Andong Li","raw_affiliation_strings":["Institute of Acoustics Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Acoustics Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5056414497"],"corresponding_institution_ids":["https://openalex.org/I2250653659"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1395351,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"8122","last_page":"8130"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9416000247001648,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9416000247001648,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.5651000142097473},{"id":"https://openalex.org/keywords/connection","display_name":"Connection (principal bundle)","score":0.5169000029563904},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5113999843597412},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5105000138282776},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.47530001401901245},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4537000060081482},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.43880000710487366},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4106999933719635},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4065999984741211}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7680000066757202},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6122000217437744},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.5651000142097473},{"id":"https://openalex.org/C13355873","wikidata":"https://www.wikidata.org/wiki/Q2920850","display_name":"Connection (principal bundle)","level":2,"score":0.5169000029563904},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5113999843597412},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5105000138282776},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48660001158714294},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.47530001401901245},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4537000060081482},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.43880000710487366},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4106999933719635},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4065999984741211},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.3903999924659729},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.39010000228881836},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.38199999928474426},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.3479999899864197},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.31060001254081726},{"id":"https://openalex.org/C103824480","wikidata":"https://www.wikidata.org/wiki/Q185889","display_name":"Time domain","level":2,"score":0.29919999837875366},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.29420000314712524},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.29010000824928284},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.28619998693466187},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.28600001335144043},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C4199805","wikidata":"https://www.wikidata.org/wiki/Q2725903","display_name":"Gaussian noise","level":2,"score":0.28200000524520874},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.2815000116825104},{"id":"https://openalex.org/C53016008","wikidata":"https://www.wikidata.org/wiki/Q620167","display_name":"Front and back ends","level":2,"score":0.2752000093460083},{"id":"https://openalex.org/C19118579","wikidata":"https://www.wikidata.org/wiki/Q786423","display_name":"Frequency domain","level":2,"score":0.27090001106262207}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2025/903","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/903","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"While":[0],"previous":[1],"diffusion-based":[2,145],"neural":[3,46],"vocoders":[4],"typically":[5],"follow":[6,59],"a":[7,42,78,83,97,126],"noise-to-data":[8],"generation":[9,23,62],"pipe-line,":[10],"the":[11,15,27,35,49,56,60,65,71,90,100,107,112,115],"linear-degradation":[12],"prior":[13],"of":[14],"mel-spectrogram":[16,66],"is":[17,55,93],"often":[18],"neglected,":[19],"resulting":[20],"in":[21],"limited":[22],"quality.":[24],"By":[25],"revisiting":[26],"vocoding":[28],"task":[29],"and":[30,75,102,134,142],"excavating":[31],"its":[32],"connection":[33,98],"with":[34,48,82],"signal":[36],"restoration":[37],"task,":[38],"this":[39],"paper":[40],"proposes":[41],"time-frequency":[43],"(T-F)":[44],"domain-based":[45],"vocoder":[47,146],"Schr\u00f6dinger":[50,91],"Bridge,":[51],"called":[52],"BridgeVoC,":[53],"which":[54],"first":[57],"to":[58,95],"data-to-data":[61],"paradigm.":[63],"Specifically,":[64],"can":[67,118],"be":[68,119],"projected":[69],"into":[70],"target":[72,103,116],"linear-scale":[73],"domain":[74],"regarded":[76],"as":[77],"degraded":[79,101,113],"spectral":[80],"representation":[81],"deficient":[84],"rank":[85],"distribution.":[86],"Based":[87],"on":[88,132],"this,":[89],"Bridge":[92],"leveraged":[94],"establish":[96],"between":[99],"data":[104],"distributions.":[105],"During":[106],"inference":[108,141],"stage,":[109],"starting":[110],"from":[111,125],"representation,":[114],"spectrum":[117],"gradually":[120],"restored":[121],"rather":[122],"than":[123],"generated":[124],"Gaussian":[127],"noise":[128],"process.":[129],"Quantitative":[130],"experiments":[131],"LJSpeech":[133],"LibriTTS":[135],"show":[136],"that":[137],"BridgeVoC":[138],"achieves":[139],"faster":[140],"surpasses":[143],"existing":[144],"baselines,":[147],"while":[148],"also":[149],"matching":[150],"or":[151],"exceeding":[152],"non-diffusion":[153],"state-of-the-art":[154],"methods":[155],"across":[156],"evaluation":[157],"metrics.":[158]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
