{"id":"https://openalex.org/W4375869388","doi":"https://doi.org/10.1109/icassp49357.2023.10095162","title":"An Attention-Based Approach to Hierarchical Multi-Label Music Instrument Classification","display_name":"An Attention-Based Approach to Hierarchical Multi-Label Music Instrument Classification","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375869388","doi":"https://doi.org/10.1109/icassp49357.2023.10095162"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095162","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095162","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040961663","display_name":"Zhi Zhong","orcid":"https://orcid.org/0000-0002-8730-5530"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhi Zhong","raw_affiliation_strings":["Sony Group Corporation,Tokyo,Japan","Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation,Tokyo,Japan","institution_ids":[]},{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036188375","display_name":"Masato Hirano","orcid":"https://orcid.org/0000-0001-7219-5212"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Masato Hirano","raw_affiliation_strings":["Sony Group Corporation,Tokyo,Japan","Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation,Tokyo,Japan","institution_ids":[]},{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051965288","display_name":"Kazuki Shimada","orcid":"https://orcid.org/0000-0001-5389-2346"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kazuki Shimada","raw_affiliation_strings":["Sony Group Corporation,Tokyo,Japan","Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation,Tokyo,Japan","institution_ids":[]},{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035933204","display_name":"Kazuya Tateishi","orcid":"https://orcid.org/0000-0002-7655-7628"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kazuya Tateishi","raw_affiliation_strings":["Sony Group Corporation,Tokyo,Japan","Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation,Tokyo,Japan","institution_ids":[]},{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104117184","display_name":"Shusuke Takahashi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shusuke Takahashi","raw_affiliation_strings":["Sony Group Corporation,Tokyo,Japan","Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation,Tokyo,Japan","institution_ids":[]},{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088754502","display_name":"Yuki Mitsufuji","orcid":"https://orcid.org/0000-0002-6806-6140"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuki Mitsufuji","raw_affiliation_strings":["Sony Group Corporation,Tokyo,Japan","Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation,Tokyo,Japan","institution_ids":[]},{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5040961663"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.6432,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.90755461,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8337041735649109},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.7726240158081055},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6042937636375427},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5981767177581787},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.5430658459663391},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5421463251113892},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5244125127792358},{"id":"https://openalex.org/keywords/multi-label-classification","display_name":"Multi-label classification","score":0.4174243211746216},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3536287248134613}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8337041735649109},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.7726240158081055},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6042937636375427},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5981767177581787},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.5430658459663391},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5421463251113892},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5244125127792358},{"id":"https://openalex.org/C2776482837","wikidata":"https://www.wikidata.org/wiki/Q3553958","display_name":"Multi-label classification","level":2,"score":0.4174243211746216},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3536287248134613},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095162","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095162","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.7599999904632568,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W2101234009","https://openalex.org/W2127870748","https://openalex.org/W2150766729","https://openalex.org/W2296724634","https://openalex.org/W2580221632","https://openalex.org/W2586665457","https://openalex.org/W2606176153","https://openalex.org/W2797876989","https://openalex.org/W2903137797","https://openalex.org/W2936044260","https://openalex.org/W2936774411","https://openalex.org/W2950060770","https://openalex.org/W3014504517","https://openalex.org/W3015867521","https://openalex.org/W3029858316","https://openalex.org/W3094550259","https://openalex.org/W3153755469","https://openalex.org/W3205139495","https://openalex.org/W3205475937","https://openalex.org/W4224918094","https://openalex.org/W4224932154","https://openalex.org/W4225434581","https://openalex.org/W4287077834","https://openalex.org/W4288284459","https://openalex.org/W6631190155","https://openalex.org/W6675354045","https://openalex.org/W6678969435","https://openalex.org/W6697040288","https://openalex.org/W6732646663","https://openalex.org/W6736723571","https://openalex.org/W6756597712","https://openalex.org/W6763945542","https://openalex.org/W6765921464","https://openalex.org/W6775753421","https://openalex.org/W6778572914","https://openalex.org/W6794178131","https://openalex.org/W6798064515"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4224009465","https://openalex.org/W2030530201","https://openalex.org/W4286629047","https://openalex.org/W4306321456","https://openalex.org/W4285260836","https://openalex.org/W3046775127","https://openalex.org/W2081647779","https://openalex.org/W3093454656"],"abstract_inverted_index":{"Although":[0],"music":[1,10,45,58],"is":[2,60],"typically":[3],"multi-label,":[4],"many":[5],"works":[6],"have":[7,133],"studied":[8],"hierarchical":[9,43,63],"tagging":[11],"with":[12],"simplified":[13],"settings":[14],"such":[15],"as":[16],"single-label":[17],"data.":[18],"Moreover,":[19],"there":[20],"lacks":[21],"a":[22,52,68,123],"framework":[23],"to":[24,36,99,159],"describe":[25],"various":[26],"joint":[27,89,139],"training":[28,90],"methods":[29,64,98,132,149],"under":[30],"the":[31,38,75,78,87,92,101,114,118,130,136,143,147],"multi-label":[32,44,93],"setting.":[33],"In":[34,141],"order":[35],"discuss":[37],"above":[39],"topics,":[40],"we":[41,95],"introduce":[42],"instrument":[46],"classification":[47],"task.":[48],"The":[49],"task":[50],"provides":[51],"realistic":[53],"setting":[54],"where":[55,108],"multi-instrument":[56],"real":[57],"data":[59],"assumed.":[61],"Various":[62],"that":[65,129],"jointly":[66],"train":[67],"DNN":[69],"are":[70],"summarized":[71],"and":[72,83,105],"explored":[73],"in":[74,91,122],"context":[76],"of":[77,80],"fusion":[79],"deep":[81],"learning":[82],"conventional":[84],"techniques.":[85],"For":[86],"effective":[88],"setting,":[94],"propose":[96],"two":[97],"model":[100],"connection":[102],"between":[103],"fine-":[104],"coarse-level":[106],"tags,":[107],"one":[109,116],"uses":[110,117],"rule-based":[111],"grouped":[112],"max-pooling,":[113],"other":[115],"attention":[119,155],"mechanism":[120],"obtained":[121],"data-driven":[124],"manner.":[125],"Our":[126],"evaluation":[127],"reveals":[128],"proposed":[131,148],"advantages":[134],"over":[135],"method":[137],"without":[138],"training.":[140],"addition,":[142],"decision":[144],"procedure":[145],"within":[146],"can":[150],"be":[151],"interpreted":[152],"by":[153],"visualizing":[154],"maps":[156],"or":[157],"referring":[158],"fixed":[160],"rules.":[161]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
