{"id":"https://openalex.org/W4226219311","doi":"https://doi.org/10.1109/taslp.2022.3169627","title":"Self-Supervised Contrastive Learning for Singing Voices","display_name":"Self-Supervised Contrastive Learning for Singing Voices","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4226219311","doi":"https://doi.org/10.1109/taslp.2022.3169627"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2022.3169627","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3169627","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/9657755/09763018.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://ieeexplore.ieee.org/ielx7/6570655/9657755/09763018.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051687457","display_name":"Hiromu Yakura","orcid":"https://orcid.org/0000-0002-2558-735X"},"institutions":[{"id":"https://openalex.org/I146399215","display_name":"University of Tsukuba","ror":"https://ror.org/02956yf07","country_code":"JP","type":"education","lineage":["https://openalex.org/I146399215"]},{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Hiromu Yakura","raw_affiliation_strings":["Graduate School of Science and Technology, University of Tsukuba, Ibaraki, Japan","National Institute of Advanced Industrial Science and Technology (AIST), Ibaraki, Japan"],"raw_orcid":"https://orcid.org/0000-0002-2558-735X","affiliations":[{"raw_affiliation_string":"Graduate School of Science and Technology, University of Tsukuba, Ibaraki, Japan","institution_ids":["https://openalex.org/I146399215"]},{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology (AIST), Ibaraki, Japan","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102735068","display_name":"Kento Watanabe","orcid":"https://orcid.org/0000-0001-8311-6073"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kento Watanabe","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology (AIST), Ibaraki, Japan"],"raw_orcid":"https://orcid.org/0000-0001-8311-6073","affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology (AIST), Ibaraki, Japan","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030650456","display_name":"Masataka Goto","orcid":"https://orcid.org/0000-0003-1167-0977"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Masataka Goto","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology (AIST), Ibaraki, Japan"],"raw_orcid":"https://orcid.org/0000-0003-1167-0977","affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology (AIST), Ibaraki, Japan","institution_ids":["https://openalex.org/I73613424"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5051687457"],"corresponding_institution_ids":["https://openalex.org/I146399215","https://openalex.org/I73613424"],"apc_list":null,"apc_paid":null,"fwci":2.3807,"has_fulltext":true,"cited_by_count":16,"citation_normalized_percentile":{"value":0.88567378,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"30","issue":null,"first_page":"1614","last_page":"1623"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.9175165295600891},{"id":"https://openalex.org/keywords/timbre","display_name":"Timbre","score":0.9173476696014404},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.663918137550354},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6315290331840515},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5740272998809814},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5508579611778259},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.472153902053833},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4533398449420929},{"id":"https://openalex.org/keywords/vibrato","display_name":"Vibrato","score":0.44610774517059326},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.40092986822128296},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.18326249718666077},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.1295720636844635},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.07751208543777466},{"id":"https://openalex.org/keywords/musical","display_name":"Musical","score":0.06954902410507202}],"concepts":[{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.9175165295600891},{"id":"https://openalex.org/C2776539107","wikidata":"https://www.wikidata.org/wiki/Q176501","display_name":"Timbre","level":3,"score":0.9173476696014404},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.663918137550354},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6315290331840515},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5740272998809814},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5508579611778259},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.472153902053833},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4533398449420929},{"id":"https://openalex.org/C2781100714","wikidata":"https://www.wikidata.org/wiki/Q377435","display_name":"Vibrato","level":3,"score":0.44610774517059326},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40092986822128296},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.18326249718666077},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.1295720636844635},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.07751208543777466},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.06954902410507202},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2022.3169627","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3169627","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/9657755/09763018.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/taslp.2022.3169627","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3169627","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/9657755/09763018.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.6200000047683716,"id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G339650242","display_name":null,"funder_award_id":"JP21J20353","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G3775593313","display_name":null,"funder_award_id":"JPMJCR20D4","funder_id":"https://openalex.org/F4320338075","funder_display_name":"Core Research for Evolutional Science and Technology"},{"id":"https://openalex.org/G6718509927","display_name":null,"funder_award_id":"CREST","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G736760701","display_name":null,"funder_award_id":"JPMJAX200R","funder_id":"https://openalex.org/F4320338246","funder_display_name":"ACT-X"}],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"},{"id":"https://openalex.org/F4320338075","display_name":"Core Research for Evolutional Science and Technology","ror":"https://ror.org/00097mb19"},{"id":"https://openalex.org/F4320338246","display_name":"ACT-X","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4226219311.pdf","grobid_xml":"https://content.openalex.org/works/W4226219311.grobid-xml"},"referenced_works_count":71,"referenced_works":["https://openalex.org/W76294984","https://openalex.org/W635779553","https://openalex.org/W1533104612","https://openalex.org/W1547679904","https://openalex.org/W1904711963","https://openalex.org/W1936725236","https://openalex.org/W1963838958","https://openalex.org/W2009955221","https://openalex.org/W2086357144","https://openalex.org/W2107990538","https://openalex.org/W2128980425","https://openalex.org/W2150769028","https://openalex.org/W2152591660","https://openalex.org/W2154591323","https://openalex.org/W2187089797","https://openalex.org/W2428180336","https://openalex.org/W2748501745","https://openalex.org/W2798991696","https://openalex.org/W2890964092","https://openalex.org/W2891575704","https://openalex.org/W2904675554","https://openalex.org/W2906098386","https://openalex.org/W2932319787","https://openalex.org/W2954081429","https://openalex.org/W2963451564","https://openalex.org/W2967638906","https://openalex.org/W2972964474","https://openalex.org/W2977423666","https://openalex.org/W2990396912","https://openalex.org/W2990873191","https://openalex.org/W3016099302","https://openalex.org/W3023371261","https://openalex.org/W3035524453","https://openalex.org/W3037149862","https://openalex.org/W3047425522","https://openalex.org/W3095948607","https://openalex.org/W3101999878","https://openalex.org/W3105159616","https://openalex.org/W3114632476","https://openalex.org/W3138202130","https://openalex.org/W3138296617","https://openalex.org/W3159763464","https://openalex.org/W3160719641","https://openalex.org/W3161606033","https://openalex.org/W3162391496","https://openalex.org/W3201143670","https://openalex.org/W3209059054","https://openalex.org/W4205848394","https://openalex.org/W4238945415","https://openalex.org/W4288280764","https://openalex.org/W4298188388","https://openalex.org/W6607294247","https://openalex.org/W6628615481","https://openalex.org/W6631190155","https://openalex.org/W6632655331","https://openalex.org/W6633499030","https://openalex.org/W6697330893","https://openalex.org/W6697492545","https://openalex.org/W6712043840","https://openalex.org/W6712099778","https://openalex.org/W6713056069","https://openalex.org/W6729122867","https://openalex.org/W6732661675","https://openalex.org/W6745878906","https://openalex.org/W6756560125","https://openalex.org/W6757202746","https://openalex.org/W6764754933","https://openalex.org/W6774314701","https://openalex.org/W6780218876","https://openalex.org/W6784011672","https://openalex.org/W6791537541"],"related_works":["https://openalex.org/W2082155001","https://openalex.org/W2373880408","https://openalex.org/W2532856746","https://openalex.org/W405661683","https://openalex.org/W238390473","https://openalex.org/W1850879794","https://openalex.org/W4288088492","https://openalex.org/W2982290989","https://openalex.org/W2119168266","https://openalex.org/W2978145617"],"abstract_inverted_index":{"This":[0,115],"study":[1],"introduces":[2],"self-supervised":[3,22],"contrastive":[4,23],"learning":[5,24],"to":[6,28,37,66,78,109,128,136,171,189,194,203],"acquire":[7,14,190],"feature":[8,31,134,168,191,225],"representations":[9,16,69,106,135,169,192,226],"of":[10,33,39,55,70,141,218,243],"singing":[11,56,94,113,199,223,244],"voices.":[12,57,245],"To":[13],"robust":[15],"in":[17,81,93],"an":[18],"unsupervised":[19],"manner,":[20],"regular":[21],"trains":[25],"neural":[26],"networks":[27,65,75],"make":[29],"the":[30,53,71,104,130,133,137,147,151,154,164,167,172,177,183,204,209,216],"representation":[32],"a":[34,119,126,145],"sample":[35],"close":[36],"those":[38,92],"its":[40],"computationally":[41],"transformed":[42,72],"versions.":[43,73],"Similarly,":[44],"we":[45,59,63,124],"employ":[46],"two":[47],"transformations\u2014pitch":[48],"shifting":[49,87],"and":[50,91,112,232,235],"time":[51,89,98],"stretching\u2014considering":[52],"nature":[54],"Nevertheless,":[58],"use":[60],"them":[61],"reversely:":[62],"train":[64],"push":[67],"away":[68],"The":[74],"then":[76],"attempt":[77],"discriminate":[79],"changes":[80],"vocal":[82,110,196,220,233],"timbres":[83],"introduced":[84,96],"by":[85,97,157,206],"pitch":[86,101],"without":[88,100,176],"stretching":[90,99],"expressions":[95],"shifting.":[102],"Consequently,":[103],"acquired":[105,175],"become":[107],"attentive":[108,193],"timbre":[111,197],"expression.":[114],"was":[116],"confirmed":[117,236],"through":[118],"singer":[120,139,230],"identification":[121],"task,":[122],"where":[123,166],"trained":[125],"classifier":[127,152,173],"learn":[129],"relationship":[131],"between":[132],"corresponding":[138],"labels":[140],"500":[142],"singers.":[143],"As":[144],"result,":[146],"employed":[148],"transformations":[149,178,210],"helped":[150],"improve":[153],"classification":[155],"accuracy":[156],"9.12%":[158],"(top-1":[159,179],"accuracy:":[160,180],"63.08%)":[161],"compared":[162],"with":[163],"case":[165],"fed":[170],"were":[174],"53.96%).":[181],"Furthermore,":[182],"proposed":[184],"approach":[185],"can":[186],"be":[187],"extended":[188],"either":[195],"or":[198,222],"expression":[200],"but":[201],"not":[202],"other":[205],"changing":[207],"how":[208],"are":[211],"incorporated.":[212],"We":[213],"particularly":[214],"explored":[215],"characteristics":[217],"such":[219],"timbre-":[221],"expression-oriented":[224],"against":[227],"song":[228],"genre,":[229],"gender,":[231],"technique,":[234],"that":[237],"they":[238],"successfully":[239],"capture":[240],"different":[241],"aspects":[242]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
