{"id":"https://openalex.org/W4372260560","doi":"https://doi.org/10.1109/icassp49357.2023.10094717","title":"Self-Transriber: Few-Shot Lyrics Transcription With Self-Training","display_name":"Self-Transriber: Few-Shot Lyrics Transcription With Self-Training","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372260560","doi":"https://doi.org/10.1109/icassp49357.2023.10094717"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10094717","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10094717","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101856962","display_name":"Xiaoxue Gao","orcid":"https://orcid.org/0000-0003-1920-5228"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Xiaoxue Gao","raw_affiliation_strings":["National University of Singapore,Department of Electrical and Computer Engineering,Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore,Department of Electrical and Computer Engineering,Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060201191","display_name":"Xianghu Yue","orcid":"https://orcid.org/0000-0003-3527-6034"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Xianghu Yue","raw_affiliation_strings":["National University of Singapore,Department of Electrical and Computer Engineering,Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore,Department of Electrical and Computer Engineering,Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":"https://orcid.org/0000-0001-9158-9401"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Haizhou Li","raw_affiliation_strings":["National University of Singapore,Department of Electrical and Computer Engineering,Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore,Department of Electrical and Computer Engineering,Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101856962"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":0.6203,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.63193061,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lyrics","display_name":"Lyrics","score":0.9247698187828064},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.7348281145095825},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6709995269775391},{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.6309627294540405},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4838677644729614},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3670825958251953},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1133609414100647}],"concepts":[{"id":"https://openalex.org/C2776436406","wikidata":"https://www.wikidata.org/wiki/Q602446","display_name":"Lyrics","level":2,"score":0.9247698187828064},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.7348281145095825},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6709995269775391},{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.6309627294540405},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4838677644729614},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3670825958251953},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1133609414100647},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10094717","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10094717","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6899999976158142,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320698","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W854541894","https://openalex.org/W1164175658","https://openalex.org/W1522301498","https://openalex.org/W1524333225","https://openalex.org/W2038678505","https://openalex.org/W2059239154","https://openalex.org/W2089428608","https://openalex.org/W2889429804","https://openalex.org/W2902351815","https://openalex.org/W2935701729","https://openalex.org/W2936774411","https://openalex.org/W2962826786","https://openalex.org/W2965116050","https://openalex.org/W2973071600","https://openalex.org/W3010943931","https://openalex.org/W3015522062","https://openalex.org/W3026010363","https://openalex.org/W3027728407","https://openalex.org/W3035160371","https://openalex.org/W3036601975","https://openalex.org/W3041561163","https://openalex.org/W3090751054","https://openalex.org/W3095350795","https://openalex.org/W3160525311","https://openalex.org/W3167533889","https://openalex.org/W3168921237","https://openalex.org/W3204397973","https://openalex.org/W3204846429","https://openalex.org/W4223646224","https://openalex.org/W4235128394","https://openalex.org/W4285483408","https://openalex.org/W4285819380","https://openalex.org/W4285821318","https://openalex.org/W4287028427","https://openalex.org/W4287173589","https://openalex.org/W4306813291","https://openalex.org/W4320537361","https://openalex.org/W4385573222","https://openalex.org/W6623517193","https://openalex.org/W6627665034","https://openalex.org/W6631190155","https://openalex.org/W6631362777","https://openalex.org/W6754635274","https://openalex.org/W6756588447","https://openalex.org/W6780218876","https://openalex.org/W6795952400","https://openalex.org/W6799484589","https://openalex.org/W6810177958","https://openalex.org/W6846349497"],"related_works":["https://openalex.org/W2360952181","https://openalex.org/W4310670065","https://openalex.org/W2389838651","https://openalex.org/W2597614303","https://openalex.org/W437317580","https://openalex.org/W3214861561","https://openalex.org/W2378183644","https://openalex.org/W2287414930","https://openalex.org/W2028097510","https://openalex.org/W2505877856"],"abstract_inverted_index":{"The":[0],"current":[1],"lyrics":[2,41,48,70,128,159],"transcription":[3,49,71],"approaches":[4,150],"heavily":[5],"rely":[6],"on":[7,54,152],"supervised":[8,105,114,117,149],"learning":[9,118,126],"with":[10,59,72,101,147],"labeled":[11,77,95,141,156],"data,":[12],"but":[13],"such":[14],"data":[15,30,34,56,96,142,157],"are":[16],"scarce":[17],"and":[18,31,90,104,115],"manual":[19],"labeling":[20],"of":[21,69,76,83,127,140,155],"singing":[22,86],"is":[23],"expensive.":[24],"How":[25],"to":[26,65,93],"benefit":[27],"from":[28],"unlabeled":[29,55,85],"alleviate":[32],"limited":[33],"problem":[35],"have":[36],"not":[37],"been":[38],"explored":[39],"for":[40,97,124,158],"transcription.":[42,129,160],"We":[43,63],"propose":[44],"the":[45,67,84,94,111,148],"first":[46],"semi-supervised":[47],"paradigm,":[50],"Self-Transcriber,":[51],"by":[52],"leveraging":[53],"using":[57,87,136],"selftraining":[58],"noisy":[60],"student":[61,98],"augmentation.":[62],"attempt":[64],"demonstrate":[66],"possibility":[68],"a":[73],"few":[74],"amount":[75],"data.":[78],"Self-Transcriber":[79],"generates":[80],"pseudo":[81],"labels":[82],"teacher":[88],"model,":[89],"augments":[91],"pseudo-labels":[92],"model":[99],"update":[100],"both":[102],"self-training":[103],"training":[106],"losses.":[107],"This":[108],"work":[109],"closes":[110],"gap":[112],"between":[113],"semi-":[116],"as":[119,121],"well":[120],"opens":[122],"doors":[123],"few-shot":[125],"Our":[130],"experiments":[131],"show":[132],"that":[133],"our":[134],"approach":[135],"only":[137],"12.7":[138],"hours":[139,154],"achieves":[143],"competitive":[144],"performance":[145],"compared":[146],"trained":[151],"149.1":[153]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
