{"id":"https://openalex.org/W4388820595","doi":"https://doi.org/10.1109/apsipaasc58517.2023.10317286","title":"Toward Leveraging Pre-Trained Self-Supervised Frontends for Automatic Singing Voice Understanding Tasks: Three Case Studies","display_name":"Toward Leveraging Pre-Trained Self-Supervised Frontends for Automatic Singing Voice Understanding Tasks: Three Case Studies","publication_year":2023,"publication_date":"2023-10-31","ids":{"openalex":"https://openalex.org/W4388820595","doi":"https://doi.org/10.1109/apsipaasc58517.2023.10317286"},"language":"en","primary_location":{"id":"doi:10.1109/apsipaasc58517.2023.10317286","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/apsipaasc58517.2023.10317286","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101025232","display_name":"Yuya Yamamoto","orcid":null},"institutions":[{"id":"https://openalex.org/I146399215","display_name":"University of Tsukuba","ror":"https://ror.org/02956yf07","country_code":"JP","type":"education","lineage":["https://openalex.org/I146399215"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Yuya Yamamoto","raw_affiliation_strings":["University of Tsukuba,Tsukuba,Japan","University of Tsukuba, Tsukuba, Japan"],"affiliations":[{"raw_affiliation_string":"University of Tsukuba,Tsukuba,Japan","institution_ids":["https://openalex.org/I146399215"]},{"raw_affiliation_string":"University of Tsukuba, Tsukuba, Japan","institution_ids":["https://openalex.org/I146399215"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5101025232"],"corresponding_institution_ids":["https://openalex.org/I146399215"],"apc_list":null,"apc_paid":null,"fwci":0.2038,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.47192488,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"33","issue":null,"first_page":"1745","last_page":"1752"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.8646763563156128},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7948887348175049},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6628111600875854},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5219623446464539},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.4494542181491852},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.4409610331058502},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.43373388051986694},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.362425833940506}],"concepts":[{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.8646763563156128},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7948887348175049},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6628111600875854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5219623446464539},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.4494542181491852},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.4409610331058502},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.43373388051986694},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.362425833940506},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/apsipaasc58517.2023.10317286","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/apsipaasc58517.2023.10317286","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.75,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320323900","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":70,"referenced_works":["https://openalex.org/W395933519","https://openalex.org/W569478347","https://openalex.org/W1494198834","https://openalex.org/W1522301498","https://openalex.org/W2027518030","https://openalex.org/W2406222150","https://openalex.org/W2526050071","https://openalex.org/W2664741028","https://openalex.org/W2906098386","https://openalex.org/W2964199361","https://openalex.org/W2977423666","https://openalex.org/W2978145617","https://openalex.org/W3016099302","https://openalex.org/W3036601975","https://openalex.org/W3090751054","https://openalex.org/W3094550259","https://openalex.org/W3113594615","https://openalex.org/W3119308075","https://openalex.org/W3160569418","https://openalex.org/W3160649916","https://openalex.org/W3198275944","https://openalex.org/W3198694222","https://openalex.org/W3202370288","https://openalex.org/W3203140070","https://openalex.org/W3206189675","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W3211224152","https://openalex.org/W4221154606","https://openalex.org/W4221159539","https://openalex.org/W4226219311","https://openalex.org/W4226380987","https://openalex.org/W4283652003","https://openalex.org/W4285106251","https://openalex.org/W4285222490","https://openalex.org/W4287802874","https://openalex.org/W4295357760","https://openalex.org/W4301371414","https://openalex.org/W4310879273","https://openalex.org/W4319862652","https://openalex.org/W4367000246","https://openalex.org/W4372260250","https://openalex.org/W4372349499","https://openalex.org/W4379251869","https://openalex.org/W4388821498","https://openalex.org/W6631190155","https://openalex.org/W6638712229","https://openalex.org/W6697015887","https://openalex.org/W6757202746","https://openalex.org/W6762718338","https://openalex.org/W6768920361","https://openalex.org/W6776218486","https://openalex.org/W6780218876","https://openalex.org/W6782211628","https://openalex.org/W6787141514","https://openalex.org/W6798597329","https://openalex.org/W6799484589","https://openalex.org/W6799613000","https://openalex.org/W6801059870","https://openalex.org/W6803378298","https://openalex.org/W6803547063","https://openalex.org/W6811157148","https://openalex.org/W6842368590","https://openalex.org/W6846349497","https://openalex.org/W6846414411","https://openalex.org/W6846451593","https://openalex.org/W6846805990","https://openalex.org/W6851857987","https://openalex.org/W6853393314","https://openalex.org/W7066299598"],"related_works":["https://openalex.org/W2390529913","https://openalex.org/W2142368101","https://openalex.org/W2372249404","https://openalex.org/W2367547137","https://openalex.org/W2354994102","https://openalex.org/W2387733758","https://openalex.org/W2376664795","https://openalex.org/W2366077683","https://openalex.org/W2028097510","https://openalex.org/W2505877856"],"abstract_inverted_index":{"Automatic":[0],"singing":[1,9,13,119,139,143],"voice":[2,10,120,140],"understanding":[3],"tasks,":[4,92],"such":[5],"as":[6,146],"singer":[7,137],"identification,":[8,138],"transcription,":[11,141],"and":[12,36,82,142,149,165],"technique":[14,144],"classification,":[15],"benefit":[16],"from":[17],"data-driven":[18],"approaches":[19,26],"that":[20,158],"utilize":[21],"deep":[22],"learning":[23,63,98],"techniques.":[24],"These":[25],"work":[27],"well":[28],"even":[29],"under":[30],"the":[31,45,77,90,112,125,184,187],"rich":[32],"diversity":[33],"of":[34,48,73,79,114,127,186],"vocal":[35],"noisy":[37],"samples":[38],"owing":[39],"to":[40,55,95,151,169,181],"their":[41],"representation":[42],"ability.":[43],"However,":[44],"limited":[46,103],"availability":[47],"labeled":[49],"data":[50,75],"remains":[51],"a":[52,178],"significant":[53],"obstacle":[54],"achieving":[56],"satisfactory":[57],"performance.":[58],"In":[59],"recent":[60],"years,":[61],"self-supervised":[62],"models":[64,88,116,131],"(SSL":[65],"models)":[66],"have":[67],"been":[68],"trained":[69],"using":[70],"large":[71],"amounts":[72],"unlabeled":[74],"in":[76,107],"field":[78],"speech":[80],"processing":[81],"music":[83],"classification.":[84],"By":[85],"fine-tuning":[86],"these":[87,153],"for":[89,117,132],"target":[91],"comparable":[93,163],"performance":[94,164],"conventional":[96],"supervised":[97],"can":[99],"be":[100],"achieved":[101],"with":[102],"training":[104],"data.":[105],"Therefore,":[106],"this":[108],"paper,":[109],"we":[110],"investigate":[111],"effectiveness":[113],"SSL":[115,130,160,188],"various":[118],"recognition":[121],"tasks.":[122],"We":[123,175],"report":[124],"results":[126,156],"experiments":[128],"comparing":[129],"three":[133],"different":[134],"tasks":[135],"(i.e.,":[136],"classification)":[145],"initial":[147],"exploration":[148],"aim":[150],"discuss":[152],"findings.":[154],"Experimental":[155],"show":[157],"each":[159,173],"model":[161],"achieves":[162],"sometimes":[166],"outperforms":[167],"compared":[168],"state-of-the-art":[170],"methods":[171],"on":[172],"task.":[174],"also":[176],"conducted":[177],"layer-wise":[179],"analysis":[180],"further":[182],"understand":[183],"behavior":[185],"models.":[189]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
