{"id":"https://openalex.org/W2949780888","doi":"https://doi.org/10.1145/3323873.3325031","title":"A Hierarchical Attentive Deep Neural Network Model for Semantic Music Annotation Integrating Multiple Music Representations","display_name":"A Hierarchical Attentive Deep Neural Network Model for Semantic Music Annotation Integrating Multiple Music Representations","publication_year":2019,"publication_date":"2019-06-05","ids":{"openalex":"https://openalex.org/W2949780888","doi":"https://doi.org/10.1145/3323873.3325031","mag":"2949780888"},"language":"en","primary_location":{"id":"doi:10.1145/3323873.3325031","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3323873.3325031","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2019 on International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100417694","display_name":"Qianqian Wang","orcid":"https://orcid.org/0000-0001-8217-5952"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qianqian Wang","raw_affiliation_strings":["Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006378862","display_name":"Feng Su","orcid":"https://orcid.org/0000-0002-8426-9634"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Su","raw_affiliation_strings":["Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065255268","display_name":"Yuyang Wang","orcid":"https://orcid.org/0000-0002-3822-0389"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuyang Wang","raw_affiliation_strings":["Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100417694"],"corresponding_institution_ids":["https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":0.4976,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.62249908,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"150","last_page":"158"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8244450092315674},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.7049998044967651},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.578870415687561},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5201759338378906},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5152697563171387},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.4866379201412201},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.4610172510147095},{"id":"https://openalex.org/keywords/music-information-retrieval","display_name":"Music information retrieval","score":0.44032543897628784},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.38655585050582886},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3346567749977112}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8244450092315674},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.7049998044967651},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.578870415687561},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5201759338378906},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5152697563171387},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.4866379201412201},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.4610172510147095},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.44032543897628784},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38655585050582886},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3346567749977112},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3323873.3325031","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3323873.3325031","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2019 on International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4300000071525574,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G4160614874","display_name":null,"funder_award_id":"BK20171345","funder_id":"https://openalex.org/F4320322769","funder_display_name":"Natural Science Foundation of Jiangsu Province"},{"id":"https://openalex.org/G6312262675","display_name":null,"funder_award_id":"61003113,61321491,61672273","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322769","display_name":"Natural Science Foundation of Jiangsu Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W2083383166","https://openalex.org/W2095705004","https://openalex.org/W2108672713","https://openalex.org/W2116373735","https://openalex.org/W2128889245","https://openalex.org/W2133824856","https://openalex.org/W2170076209","https://openalex.org/W2293772821","https://openalex.org/W2406196141","https://openalex.org/W2525577390","https://openalex.org/W2542729039","https://openalex.org/W2607662938","https://openalex.org/W2766520430","https://openalex.org/W3043861921","https://openalex.org/W3105202226"],"related_works":["https://openalex.org/W2530685530","https://openalex.org/W4375868962","https://openalex.org/W2011227383","https://openalex.org/W2088854863","https://openalex.org/W1976719989","https://openalex.org/W2942893872","https://openalex.org/W2065606036","https://openalex.org/W3179495260","https://openalex.org/W3127543252","https://openalex.org/W3135448569"],"abstract_inverted_index":{"Automatically":[0],"assigning":[1],"a":[2,36,137,170],"group":[3],"of":[4,71,90,98,131,154],"appropriate":[5],"semantic":[6],"tags":[7],"to":[8,18,77,121,141],"one":[9],"music":[10,29,40,53,100,162,175,188,194],"piece":[11],"provides":[12],"an":[13],"effective":[14,79],"way":[15],"for":[16,52,101,174],"people":[17],"efficiently":[19],"utilize":[20],"the":[21,91,99,102,123,128,132,152,155,158,185],"massive":[22],"and":[23,27,49,58,87,134],"ever":[24],"increasing":[25],"on-line":[26],"off-line":[28],"data.":[30],"In":[31],"this":[32],"paper,":[33],"we":[34],"propose":[35],"novel":[37],"content-based":[38],"automatic":[39],"annotation":[41,103],"model":[42,62,112,159,180],"that":[43],"hierarchically":[44],"combines":[45],"attentive":[46,67],"convolutional":[47,68],"networks":[48,51,69,120],"recurrent":[50],"representation":[54,109,147],"learning,":[55],"structure":[56],"modelling":[57],"tag":[59,176],"prediction.":[60,177],"The":[61,111,178],"first":[63],"exploits":[64,114],"two":[65,146],"separate":[66],"composed":[70],"multiple":[72],"gated":[73],"linear":[74],"units":[75],"(GLUs)":[76],"learn":[78],"representations":[80],"from":[81],"both":[82],"1-D":[83],"raw":[84],"waveform":[85],"signals":[86],"2-D":[88],"Mel-spectrogram":[89],"music,":[92,133],"which":[93,149],"better":[94],"captures":[95],"informative":[96],"features":[97],"task":[104],"than":[105],"exploiting":[106],"any":[107],"single":[108],"channel.":[110],"then":[113],"bidirectional":[115],"Long":[116],"Short-Term":[117],"Memory":[118],"(LSTM)":[119],"depict":[122],"time-varying":[124],"structures":[125],"embedded":[126],"in":[127],"description":[129],"sequences":[130],"further":[135],"introduces":[136],"dual-state":[138],"LSTM":[139],"network":[140],"encode":[142],"temporal":[143],"correlations":[144],"between":[145],"channels,":[148],"effectively":[150],"enriches":[151],"descriptions":[153,163],"music.":[156],"Finally,":[157],"adaptively":[160],"aggregates":[161],"generated":[164],"at":[165],"every":[166],"time":[167],"step":[168],"with":[169],"self-attentive":[171],"multi-weighting":[172],"mechanism":[173],"proposed":[179],"achieves":[181],"state-of-the-art":[182],"results":[183],"on":[184,193],"public":[186],"MagnaTagATune":[187],"dataset,":[189],"demonstrating":[190],"its":[191],"effectiveness":[192],"annotation.":[195]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
