{"id":"https://openalex.org/W4315786026","doi":"https://doi.org/10.1186/s13636-022-00268-1","title":"Stripe-Transformer: deep stripe feature learning for music source separation","display_name":"Stripe-Transformer: deep stripe feature learning for music source separation","publication_year":2023,"publication_date":"2023-01-12","ids":{"openalex":"https://openalex.org/W4315786026","doi":"https://doi.org/10.1186/s13636-022-00268-1"},"language":"en","primary_location":{"id":"doi:10.1186/s13636-022-00268-1","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13636-022-00268-1","pdf_url":"https://asmp-eurasipjournals.springeropen.com/counter/pdf/10.1186/s13636-022-00268-1","source":{"id":"https://openalex.org/S19605986","display_name":"EURASIP Journal on Audio Speech and Music Processing","issn_l":"1687-4714","issn":["1687-4714","1687-4722"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EURASIP Journal on Audio, Speech, and Music Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://asmp-eurasipjournals.springeropen.com/counter/pdf/10.1186/s13636-022-00268-1","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064003387","display_name":"Jiale Qian","orcid":"https://orcid.org/0000-0002-3386-3369"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiale Qian","raw_affiliation_strings":["School of Computer Science and Technology, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101765236","display_name":"Xinlu Liu","orcid":"https://orcid.org/0000-0002-8560-2283"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinlu Liu","raw_affiliation_strings":["School of Computer Science and Technology, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101397583","display_name":"Yi Yu","orcid":"https://orcid.org/0009-0009-7872-7510"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yi Yu","raw_affiliation_strings":["Digital Content and Media Sciences Research Division, National Institute of Informatics, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Digital Content and Media Sciences Research Division, National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100318193","display_name":"Wei Li","orcid":"https://orcid.org/0000-0002-4486-8341"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Li","raw_affiliation_strings":["School of Computer Science and Technology, Fudan University, Shanghai, China","Shanghai Key Laboratory of Intelligent Information Processing, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]},{"raw_affiliation_string":"Shanghai Key Laboratory of Intelligent Information Processing, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5064003387"],"corresponding_institution_ids":["https://openalex.org/I24943067"],"apc_list":{"value":1115,"currency":"GBP","value_usd":1367},"apc_paid":{"value":1115,"currency":"GBP","value_usd":1367},"fwci":1.7659,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.84960732,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"2023","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.8250042200088501},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.6719409823417664},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6242344379425049},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6089113354682922},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.45177051424980164},{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.4475475549697876},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4359610974788666},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.4348413944244385},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.42094194889068604},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.41563040018081665},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3584105372428894},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14560502767562866},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.13257023692131042},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.1137283444404602},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.10098958015441895}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.8250042200088501},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.6719409823417664},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6242344379425049},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6089113354682922},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.45177051424980164},{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.4475475549697876},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4359610974788666},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.4348413944244385},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.42094194889068604},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.41563040018081665},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3584105372428894},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14560502767562866},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.13257023692131042},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.1137283444404602},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.10098958015441895}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1186/s13636-022-00268-1","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13636-022-00268-1","pdf_url":"https://asmp-eurasipjournals.springeropen.com/counter/pdf/10.1186/s13636-022-00268-1","source":{"id":"https://openalex.org/S19605986","display_name":"EURASIP Journal on Audio Speech and Music Processing","issn_l":"1687-4714","issn":["1687-4714","1687-4722"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EURASIP Journal on Audio, Speech, and Music Processing","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:6864a344c1574a74b515863874884f02","is_oa":true,"landing_page_url":"https://doaj.org/article/6864a344c1574a74b515863874884f02","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"EURASIP Journal on Audio, Speech, and Music Processing, Vol 2023, Iss 1, Pp 1-13 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s13636-022-00268-1","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13636-022-00268-1","pdf_url":"https://asmp-eurasipjournals.springeropen.com/counter/pdf/10.1186/s13636-022-00268-1","source":{"id":"https://openalex.org/S19605986","display_name":"EURASIP Journal on Audio Speech and Music Processing","issn_l":"1687-4714","issn":["1687-4714","1687-4722"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EURASIP Journal on Audio, Speech, and Music Processing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G201252934","display_name":null,"funder_award_id":"62171138","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6031867557","display_name":null,"funder_award_id":"2019YFC17","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4315786026.pdf"},"referenced_works_count":50,"referenced_works":["https://openalex.org/W1505392378","https://openalex.org/W1901129140","https://openalex.org/W2022668263","https://openalex.org/W2064675550","https://openalex.org/W2066776249","https://openalex.org/W2098950531","https://openalex.org/W2127851351","https://openalex.org/W2130770109","https://openalex.org/W2157331557","https://openalex.org/W2159898352","https://openalex.org/W2160767268","https://openalex.org/W2194775991","https://openalex.org/W2296600244","https://openalex.org/W2400805995","https://openalex.org/W2563637302","https://openalex.org/W2587994092","https://openalex.org/W2612138235","https://openalex.org/W2669032454","https://openalex.org/W2752782242","https://openalex.org/W2889404825","https://openalex.org/W2892009249","https://openalex.org/W2903739847","https://openalex.org/W2913030842","https://openalex.org/W2963249619","https://openalex.org/W2963446712","https://openalex.org/W2963750251","https://openalex.org/W2963751183","https://openalex.org/W2963992487","https://openalex.org/W2964070952","https://openalex.org/W2972411915","https://openalex.org/W2972653970","https://openalex.org/W2972964474","https://openalex.org/W2986673441","https://openalex.org/W2997938083","https://openalex.org/W3015753416","https://openalex.org/W3015832727","https://openalex.org/W3034502973","https://openalex.org/W3037149862","https://openalex.org/W3096798607","https://openalex.org/W3113792747","https://openalex.org/W3131500599","https://openalex.org/W3133795225","https://openalex.org/W3138516171","https://openalex.org/W3160241658","https://openalex.org/W3160733670","https://openalex.org/W3183810078","https://openalex.org/W3192255518","https://openalex.org/W4312726009","https://openalex.org/W6603578652","https://openalex.org/W6604770866"],"related_works":["https://openalex.org/W3094316140","https://openalex.org/W3133205200","https://openalex.org/W4289363934","https://openalex.org/W2898145319","https://openalex.org/W2898606530","https://openalex.org/W2098101267","https://openalex.org/W2059119686","https://openalex.org/W4289362680","https://openalex.org/W2403380333","https://openalex.org/W2761596192"],"abstract_inverted_index":{"Abstract":[0],"Music":[1],"source":[2],"separation":[3],"(MSS)":[4],"is":[5,91],"to":[6,93,145],"isolate":[7],"musical":[8],"instrument":[9],"signals":[10],"from":[11],"the":[12,55,98,109,135,139,142],"given":[13],"music":[14,20,26,104,152],"mixture.":[15],"Stripes":[16],"widely":[17],"exist":[18],"in":[19,103,151],"spectrograms,":[21],"which":[22,60],"potentially":[23],"indicate":[24],"high-level":[25],"information.":[27],"For":[28],"example,":[29],"a":[30,34,38,42,47,76,85],"vertical":[31],"stripe":[32,40,51,78],"indicates":[33,41],"drum":[35],"time":[36,99],"and":[37,100,148],"horizontal":[39],"harmonic":[43,149],"component":[44],"such":[45],"as":[46],"singing":[48],"voice.":[49],"These":[50],"features":[52],"actually":[53],"affect":[54],"performance":[56,130],"of":[57,122,141],"MSS":[58,68,83],"systems,":[59],"has":[61],"not":[62],"been":[63],"explicitly":[64],"explored":[65],"by":[66],"previous":[67],"studies.":[69],"In":[70],"this":[71],"paper,":[72],"we":[73],"propose":[74],"stripe-Transformer,":[75],"deep":[77],"feature":[79],"learning":[80],"method":[81],"for":[82],"with":[84,131],"Transformer-based":[86],"architecture.":[87],"Stripe-wise":[88],"self-attention":[89],"mechanism":[90],"designed":[92],"capture":[94],"global":[95],"dependencies":[96],"along":[97],"frequency":[101],"axis":[102],"spectrograms.":[105],"Experimental":[106],"results":[107,137],"on":[108,124],"Musdb18":[110],"dataset":[111],"show":[112,138],"that":[113],"our":[114],"proposed":[115,143],"model":[116,144],"reaches":[117],"an":[118],"average":[119],"source-to-distortion":[120],"(SDR)":[121],"6.71dB":[123],"four":[125],"target":[126],"sources,":[127],"achieving":[128],"state-of-the-art":[129],"fewer":[132],"parameters.":[133],"And":[134],"visualization":[136],"capability":[140],"extract":[146],"beat":[147],"structure":[150],"signals.":[153]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
