{"id":"https://openalex.org/W2963750251","doi":"https://doi.org/10.1109/waspaa.2017.8169987","title":"Multi-Scale multi-band densenets for audio source separation","display_name":"Multi-Scale multi-band densenets for audio source separation","publication_year":2017,"publication_date":"2017-10-01","ids":{"openalex":"https://openalex.org/W2963750251","doi":"https://doi.org/10.1109/waspaa.2017.8169987","mag":"2963750251"},"language":"en","primary_location":{"id":"doi:10.1109/waspaa.2017.8169987","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa.2017.8169987","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101589290","display_name":"Naoya Takahashi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Naoya Takahashi","raw_affiliation_strings":["Sony Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088754502","display_name":"Yuki Mitsufuji","orcid":"https://orcid.org/0000-0002-6806-6140"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuki Mitsufuji","raw_affiliation_strings":["Sony Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Corporation, Tokyo, Japan","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101589290"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":14.1856,"has_fulltext":false,"cited_by_count":152,"citation_normalized_percentile":{"value":0.99342376,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"21","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.802161455154419},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.7015036344528198},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.6509018540382385},{"id":"https://openalex.org/keywords/distortion","display_name":"Distortion (music)","score":0.6172823905944824},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5734164714813232},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.47874563932418823},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47598281502723694},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4510773718357086},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.4100886881351471},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.39191675186157227},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.37057679891586304},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.1824178397655487},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.1491318643093109},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.12741896510124207},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.11050459742546082},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.07762527465820312},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07466503977775574}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.802161455154419},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.7015036344528198},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.6509018540382385},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.6172823905944824},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5734164714813232},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.47874563932418823},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47598281502723694},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4510773718357086},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.4100886881351471},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.39191675186157227},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.37057679891586304},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.1824178397655487},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.1491318643093109},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.12741896510124207},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.11050459742546082},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.07762527465820312},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07466503977775574},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C194257627","wikidata":"https://www.wikidata.org/wiki/Q211554","display_name":"Amplifier","level":3,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/waspaa.2017.8169987","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa.2017.8169987","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","score":0.6899999976158142,"id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1026270304","https://openalex.org/W1555814299","https://openalex.org/W1594771616","https://openalex.org/W1836465849","https://openalex.org/W1842657353","https://openalex.org/W2062710265","https://openalex.org/W2112796928","https://openalex.org/W2113990625","https://openalex.org/W2127851351","https://openalex.org/W2143027228","https://openalex.org/W2156387975","https://openalex.org/W2179490238","https://openalex.org/W2188183693","https://openalex.org/W2194775991","https://openalex.org/W2198724430","https://openalex.org/W2285559681","https://openalex.org/W2296724634","https://openalex.org/W2335728318","https://openalex.org/W2401104373","https://openalex.org/W2408588119","https://openalex.org/W2552400013","https://openalex.org/W2563534197","https://openalex.org/W2570915410","https://openalex.org/W2669032454","https://openalex.org/W2916985722","https://openalex.org/W2950621961","https://openalex.org/W2963446712","https://openalex.org/W2963543871","https://openalex.org/W3099271704","https://openalex.org/W3118608800","https://openalex.org/W6626481562","https://openalex.org/W6633026971","https://openalex.org/W6638667902","https://openalex.org/W6682889407","https://openalex.org/W6697040288","https://openalex.org/W6703116779","https://openalex.org/W6730869680","https://openalex.org/W6759212570","https://openalex.org/W6787972765"],"related_works":["https://openalex.org/W3125011624","https://openalex.org/W1571003708","https://openalex.org/W3132176577","https://openalex.org/W2114974607","https://openalex.org/W2150502936","https://openalex.org/W4381327731","https://openalex.org/W3112881379","https://openalex.org/W2158216966","https://openalex.org/W2002298560","https://openalex.org/W2019256813"],"abstract_inverted_index":{"This":[0],"paper":[1],"deals":[2],"with":[3,69,133],"the":[4,12,18,24,50,70,120],"problem":[5,72],"of":[6,17,20,73,91,98,116],"audio":[7,21,74],"source":[8,22,75],"separation.":[9],"To":[10,67],"handle":[11],"complex":[13],"and":[14,83,102,127],"ill-posed":[15],"nature":[16],"problems":[19],"separation,":[23,76],"current":[25],"state-of-the-art":[26,104],"approaches":[27],"employ":[28],"deep":[29],"neural":[30],"networks":[31],"to":[32],"obtain":[33],"instrumental":[34],"spectra":[35],"from":[36],"a":[37,44,111],"mixture.":[38],"In":[39],"this":[40],"study,":[41],"we":[42],"propose":[43],"novel":[45],"network":[46,56],"architecture":[47,122],"that":[48],"extends":[49],"recently":[51],"developed":[52],"densely":[53],"connected":[54],"convolutional":[55],"(DenseNet),":[57],"which":[58],"has":[59],"shown":[60],"excellent":[61],"results":[62,105],"on":[63,89,106],"image":[64],"classification":[65],"tasks.":[66],"deal":[68],"specific":[71],"an":[77],"up-sampling":[78],"layer,":[79],"block":[80],"skip":[81],"connection":[82],"band-dedicated":[84],"dense":[85],"blocks":[86],"are":[87],"incorporated":[88],"top":[90],"DenseNet.":[92],"The":[93],"proposed":[94,121],"approach":[95],"takes":[96],"advantage":[97],"long":[99],"contextual":[100],"information":[101],"outperforms":[103],"SiSEC":[107],"2016":[108],"competition":[109],"by":[110],"large":[112],"margin":[113],"in":[114],"terms":[115],"signal-to-distortion":[117],"ratio.":[118],"Moreover,":[119],"requires":[123],"significantly":[124],"fewer":[125],"parameters":[126],"considerably":[128],"less":[129],"training":[130],"time":[131],"compared":[132],"other":[134],"methods.":[135]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":20},{"year":2021,"cited_by_count":19},{"year":2020,"cited_by_count":29},{"year":2019,"cited_by_count":30},{"year":2018,"cited_by_count":17}],"updated_date":"2026-03-04T09:10:02.777135","created_date":"2025-10-10T00:00:00"}
