{"id":"https://openalex.org/W4408352380","doi":"https://doi.org/10.1109/icassp49660.2025.10890826","title":"Dual Position Attention Time-Frequency Network for Binaural Audio Synthesis","display_name":"Dual Position Attention Time-Frequency Network for Binaural Audio Synthesis","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408352380","doi":"https://doi.org/10.1109/icassp49660.2025.10890826"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10890826","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890826","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020365527","display_name":"Changjun He","orcid":"https://orcid.org/0009-0000-4279-5946"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Changjun He","raw_affiliation_strings":["Harbin Institute of Technology,Harbin,China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology,Harbin,China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100652322","display_name":"Weiping Chen","orcid":"https://orcid.org/0000-0001-5658-1914"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiping Chen","raw_affiliation_strings":["Harbin Institute of Technology,Harbin,China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology,Harbin,China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111342963","display_name":"Mingjiang Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingjiang Wang","raw_affiliation_strings":["Harbin Institute of Technology (Shenzhen),Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology (Shenzhen),Shenzhen,China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5020365527"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05113975,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9569000005722046,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.6924222707748413},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6173950433731079},{"id":"https://openalex.org/keywords/binaural-recording","display_name":"Binaural recording","score":0.6104856729507446},{"id":"https://openalex.org/keywords/position","display_name":"Position (finance)","score":0.5292023420333862},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.40937644243240356}],"concepts":[{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.6924222707748413},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6173950433731079},{"id":"https://openalex.org/C201247586","wikidata":"https://www.wikidata.org/wiki/Q5612967","display_name":"Binaural recording","level":2,"score":0.6104856729507446},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.5292023420333862},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.40937644243240356},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10890826","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890826","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W124275677","https://openalex.org/W1974110359","https://openalex.org/W2065605319","https://openalex.org/W2098882755","https://openalex.org/W2101298784","https://openalex.org/W2140889287","https://openalex.org/W2143270321","https://openalex.org/W2609278443","https://openalex.org/W2616802817","https://openalex.org/W2763307249","https://openalex.org/W2903739847","https://openalex.org/W2970906079","https://openalex.org/W2982624843","https://openalex.org/W3034742263","https://openalex.org/W3136499730","https://openalex.org/W3157866890","https://openalex.org/W3160970338","https://openalex.org/W3163613080","https://openalex.org/W3198586259","https://openalex.org/W3202387331","https://openalex.org/W3213191779","https://openalex.org/W4205278059","https://openalex.org/W4225271852","https://openalex.org/W4231697785","https://openalex.org/W4285044837","https://openalex.org/W4312891280","https://openalex.org/W4372266895","https://openalex.org/W4372270549","https://openalex.org/W4394891739","https://openalex.org/W4396620480","https://openalex.org/W6629844378","https://openalex.org/W6631190155","https://openalex.org/W6726497184","https://openalex.org/W6739901393","https://openalex.org/W6767111847","https://openalex.org/W6792340124","https://openalex.org/W6794073272","https://openalex.org/W6838844135"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2766995619","https://openalex.org/W4224270619","https://openalex.org/W2579722767","https://openalex.org/W2168148781","https://openalex.org/W1991848873","https://openalex.org/W2026165661","https://openalex.org/W2099651033"],"abstract_inverted_index":{"In":[0],"applications":[1],"such":[2],"as":[3],"virtual":[4],"reality":[5],"and":[6,73,114],"augmented":[7],"reality,":[8],"binaural":[9,20,88,129],"audio":[10,21,89],"provides":[11],"listeners":[12],"with":[13,22,131],"a":[14,59,101,123],"more":[15],"immersive":[16],"experience.":[17],"To":[18,52],"synthesize":[19],"enhanced":[23],"spatial":[24],"localization,":[25],"especially":[26],"in":[27,49,94,145],"scenarios":[28],"involving":[29],"moving":[30],"sound":[31,78,111],"sources,":[32],"accurate":[33,128],"phase":[34,116,146],"estimation":[35,117],"is":[36],"crucial.":[37],"However,":[38],"existing":[39],"deep":[40],"learning":[41],"methods":[42],"have":[43],"yet":[44],"to":[45,87,107],"achieve":[46],"satisfactory":[47],"results":[48,133],"this":[50,54,56],"area.":[51],"address":[53],"issue,":[55],"paper":[57],"introduces":[58],"Dual":[60,102],"Position":[61,103],"Attention":[62,104],"Time-Frequency":[63],"Network":[64],"(DPATFNet).":[65],"Specifically,":[66],"our":[67],"approach":[68],"targets":[69],"the":[70,82,95,135],"interaural":[71],"differences":[72],"Doppler":[74],"effects":[75],"induced":[76],"by":[77],"source":[79,112],"movement,":[80],"guiding":[81],"synthesis":[83],"process":[84],"from":[85],"monaural":[86],"through":[90],"strong":[91,124],"positional":[92],"conditions":[93],"time-frequency":[96],"domain.":[97],"The":[98,119],"network":[99],"employs":[100],"Block":[105],"(DPAB)":[106],"effectively":[108],"focus":[109],"on":[110,134],"movement":[113],"improve":[115],"performance.":[118],"proposed":[120],"DPATFNet":[121,141],"demonstrates":[122],"capability":[125],"for":[126],"synthesizing":[127],"audio,":[130],"experimental":[132],"Binaural":[136],"Speech":[137],"dataset":[138],"showing":[139],"that":[140],"achieves":[142],"state-of-the-art":[143],"performance":[144],"metrics":[147],"(Phase-L2:":[148],"0.717,":[149],"IPD-L2:":[150],"1.020,":[151],"Wave-L2:":[152],"0.148,":[153],"Amplitude-L2:":[154],"0.037).":[155]},"counts_by_year":[],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
