{"id":"https://openalex.org/W3192610910","doi":"https://doi.org/10.1109/icassp39728.2021.9413774","title":"Singer Identification Using Deep Timbre Feature Learning with KNN-NET","display_name":"Singer Identification Using Deep Timbre Feature Learning with KNN-NET","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3192610910","doi":"https://doi.org/10.1109/icassp39728.2021.9413774","mag":"3192610910"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9413774","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413774","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028437800","display_name":"Xulong Zhang","orcid":"https://orcid.org/0000-0001-7005-992X"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xulong Zhang","raw_affiliation_strings":["Fudan University,School of Computer Science and Technology,Shanghai,China","School of Computer Science and Technology, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University,School of Computer Science and Technology,Shanghai,China","institution_ids":["https://openalex.org/I24943067"]},{"raw_affiliation_string":"School of Computer Science and Technology, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064003387","display_name":"Jiale Qian","orcid":"https://orcid.org/0000-0002-3386-3369"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiale Qian","raw_affiliation_strings":["Fudan University,School of Computer Science and Technology,Shanghai,China","School of Computer Science and Technology, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University,School of Computer Science and Technology,Shanghai,China","institution_ids":["https://openalex.org/I24943067"]},{"raw_affiliation_string":"School of Computer Science and Technology, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100745222","display_name":"Yi Yu","orcid":"https://orcid.org/0000-0002-0294-6620"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yi Yu","raw_affiliation_strings":["National Institute of Informatics,Digital Content and Media Sciences Research Division,Tokyo,Japan","Digital Content and Media Sciences Research Division, National Institute of Informatics, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Informatics,Digital Content and Media Sciences Research Division,Tokyo,Japan","institution_ids":["https://openalex.org/I184597095"]},{"raw_affiliation_string":"Digital Content and Media Sciences Research Division, National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075553304","display_name":"Yifu Sun","orcid":"https://orcid.org/0000-0003-4924-9387"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yifu Sun","raw_affiliation_strings":["Fudan University,School of Computer Science and Technology,Shanghai,China","School of Computer Science and Technology, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University,School of Computer Science and Technology,Shanghai,China","institution_ids":["https://openalex.org/I24943067"]},{"raw_affiliation_string":"School of Computer Science and Technology, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100318193","display_name":"Wei Li","orcid":"https://orcid.org/0000-0002-4486-8341"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Li","raw_affiliation_strings":["Fudan University,School of Computer Science and Technology,Shanghai,China","School of Computer Science and Technology, Fudan University, Shanghai, China","Shanghai Key Laboratory of Intelligent Information Processing, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University,School of Computer Science and Technology,Shanghai,China","institution_ids":["https://openalex.org/I24943067"]},{"raw_affiliation_string":"School of Computer Science and Technology, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]},{"raw_affiliation_string":"Shanghai Key Laboratory of Intelligent Information Processing, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5028437800"],"corresponding_institution_ids":["https://openalex.org/I24943067"],"apc_list":null,"apc_paid":null,"fwci":2.8949,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.91690331,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3380","last_page":"3384"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13996","display_name":"Diverse Musicological Studies","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1210","display_name":"Music"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/timbre","display_name":"Timbre","score":0.860482931137085},{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.8071732521057129},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7693291306495667},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6252704858779907},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6066607236862183},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.553456723690033},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5470699667930603},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5335284471511841},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.490192711353302},{"id":"https://openalex.org/keywords/layer","display_name":"Layer (electronics)","score":0.450653076171875},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.44447317719459534},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4383500814437866},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4124599099159241}],"concepts":[{"id":"https://openalex.org/C2776539107","wikidata":"https://www.wikidata.org/wiki/Q176501","display_name":"Timbre","level":3,"score":0.860482931137085},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.8071732521057129},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7693291306495667},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6252704858779907},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6066607236862183},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.553456723690033},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5470699667930603},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5335284471511841},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.490192711353302},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.450653076171875},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.44447317719459534},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4383500814437866},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4124599099159241},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp39728.2021.9413774","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413774","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W198601177","https://openalex.org/W1832115024","https://openalex.org/W2165828254","https://openalex.org/W2332271625","https://openalex.org/W2342603028","https://openalex.org/W2402499561","https://openalex.org/W2584329820","https://openalex.org/W2587994092","https://openalex.org/W2735481939","https://openalex.org/W2774707525","https://openalex.org/W2904675554","https://openalex.org/W2922094867","https://openalex.org/W2947201980","https://openalex.org/W2952395326","https://openalex.org/W2965939923","https://openalex.org/W2972964474","https://openalex.org/W2977423666","https://openalex.org/W3016099302","https://openalex.org/W3046610288","https://openalex.org/W3203950195","https://openalex.org/W4287696772","https://openalex.org/W6608019436","https://openalex.org/W6638712229","https://openalex.org/W6684893555","https://openalex.org/W6746914816","https://openalex.org/W6764574124","https://openalex.org/W6781417564","https://openalex.org/W6802004004"],"related_works":["https://openalex.org/W3107204728","https://openalex.org/W4287591324","https://openalex.org/W4226420367","https://openalex.org/W2980176872","https://openalex.org/W2962876041","https://openalex.org/W3090555870","https://openalex.org/W3108503355","https://openalex.org/W4323060069","https://openalex.org/W3095506574","https://openalex.org/W3190449293"],"abstract_inverted_index":{"In":[0],"this":[1,31,56],"paper,":[2],"we":[3,58,102],"study":[4],"the":[5,35,46,60,72,81,95,99,105,132,138,142,161,164,184],"issue":[6],"of":[7,25,48,74,83,155,163],"automatic":[8],"singer":[9,84,115],"identification":[10],"(SID)":[11],"in":[12,34,51],"popular":[13],"music":[14,158],"recordings,":[15],"which":[16,64],"aims":[17],"to":[18,112,123,159,183],"recognize":[19],"who":[20],"sang":[21],"a":[22,38,66,108,178],"given":[23],"piece":[24],"song.":[26],"The":[27,167],"main":[28],"challenge":[29],"for":[30,62,128],"investigation":[32],"lies":[33],"fact":[36],"that":[37,137,173],"singer\u2019s":[39],"singing":[40],"voice":[41,85],"changes":[42],"and":[43,86,151],"intertwines":[44],"with":[45,71],"signal":[47],"background":[49,87],"accompaniment":[50],"time":[52],"domain.":[53],"To":[54],"handle":[55],"challenge,":[57],"propose":[59],"KNN-Net":[61],"SID,":[63],"is":[65,120],"deep":[67,91],"neural":[68,92],"network":[69],"model":[70,176],"goal":[73],"learning":[75],"local":[76],"timbre":[77,126],"feature":[78],"representation":[79],"from":[80],"mixture":[82],"music.":[88],"Unlike":[89],"other":[90],"networks":[93],"using":[94],"softmax":[96],"layer":[97,111],"as":[98,107],"output":[100,113],"layer,":[101],"instead":[103],"utilize":[104],"KNN":[106],"more":[109,168],"interpretable":[110],"target":[114],"labels.":[116],"Moreover,":[117],"attention":[118],"mechanism":[119],"first":[121],"introduced":[122],"highlight":[124],"crucial":[125],"features":[127],"SID.":[129],"Experiments":[130],"on":[131],"existing":[133],"artist20":[134],"dataset":[135],"show":[136],"proposed":[139,165,175],"approach":[140],"outperforms":[141],"state-of-the-art":[143,185],"method":[144],"by":[145],"4%.":[146],"We":[147],"also":[148],"create":[149],"singer32":[150],"singer60":[152],"datasets":[153],"consisting":[154],"Chinese":[156],"pop":[157],"evaluate":[160],"reliability":[162],"method.":[166],"extensive":[169],"experiments":[170],"additionally":[171],"indicate":[172],"our":[174],"achieves":[177],"significant":[179],"performance":[180],"improvement":[181],"compared":[182],"methods.":[186]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":11}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
