{"id":"https://openalex.org/W6929796062","doi":"https://doi.org/10.5281/zenodo.10265295","title":"LyricWhiz: Robust Multilingual Zero-Shot Lyrics Transcription by Whispering to ChatGPT","display_name":"LyricWhiz: Robust Multilingual Zero-Shot Lyrics Transcription by Whispering to ChatGPT","publication_year":2023,"publication_date":"2023-11-04","ids":{"openalex":"https://openalex.org/W6929796062","doi":"https://doi.org/10.5281/zenodo.10265295"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.10265295","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.10265295","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.10265295","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Le Zhuo","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Le Zhuo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ruibin Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruibin Yuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Jiahao Pan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiahao Pan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Yinghao Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yinghao Ma","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Yizhi Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yizhi Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ge Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ge Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Si Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Si Liu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Roger B. Dannenberg","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roger B. Dannenberg","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Jie Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jie Fu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Chenghua Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chenghua Lin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Emmanouil Benetos","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Emmanouil Benetos","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wenhu Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wenhu Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wei Xue","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei Xue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Yike Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yike Guo","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":14,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.31116759,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11354","display_name":"Evolution and Paleontology Studies","score":0.27480000257492065,"subfield":{"id":"https://openalex.org/subfields/1911","display_name":"Paleontology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11354","display_name":"Evolution and Paleontology Studies","score":0.27480000257492065,"subfield":{"id":"https://openalex.org/subfields/1911","display_name":"Paleontology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12417","display_name":"Morphological variations and asymmetry","score":0.17069999873638153,"subfield":{"id":"https://openalex.org/subfields/2608","display_name":"Geometry and Topology"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12234","display_name":"Mollusks and Parasites Studies","score":0.05550000071525574,"subfield":{"id":"https://openalex.org/subfields/1109","display_name":"Insect Science"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lyrics","display_name":"Lyrics","score":0.9419000148773193},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.7648000121116638},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.46480000019073486},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.39590001106262207},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.3815999925136566}],"concepts":[{"id":"https://openalex.org/C2776436406","wikidata":"https://www.wikidata.org/wiki/Q602446","display_name":"Lyrics","level":2,"score":0.9419000148773193},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.7648000121116638},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7519999742507935},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5400999784469604},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4779999852180481},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.46480000019073486},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4339999854564667},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.39590001106262207},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.3815999925136566},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.3666999936103821},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.2632000148296356},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2524000108242035}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.10265295","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.10265295","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.10265295","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.10265295","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4866061806678772}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,143],"introduce":[1],"LyricWhiz,":[2],"a":[3,35,75,124,133,159],"robust,":[4],"multilingual,":[5],"and":[6,27,42,82,100,131,141,149,161],"zero-shot":[7],"automatic":[8],"lyrics":[9,17,104,120,157],"transcription":[10,18,121],"method":[11,148],"achieving":[12],"state-of-the-art":[13],"performance":[14,77],"on":[15,129],"various":[16],"datasets,":[19],"even":[20],"in":[21,98],"challenging":[22,160],"genres":[23],"such":[24],"as":[25,57,67,71],"rock":[26],"metal.":[28],"Our":[29,84],"novel,":[30],"training-free":[31],"approach":[32],"utilizes":[33],"Whisper,":[34],"weakly":[36],"supervised":[37],"robust":[38],"speech":[39],"recognition":[40],"model,":[41],"GPT-4,":[43],"today's":[44],"most":[45],"performant":[46],"chat-based":[47],"large":[48],"language":[49],"model.":[50],"In":[51],"the":[52,58,62,68,114,153],"proposed":[53,147],"method,":[54],"Whisper":[55],"functions":[56],"\"ear\"":[59],"by":[60],"transcribing":[61],"audio,":[63],"while":[64],"GPT-4":[65],"serves":[66],"\"brain,\"":[69],"acting":[70],"an":[72],"annotator":[73],"with":[74,123],"strong":[76],"for":[78,137],"contextualized":[79],"output":[80],"selection":[81],"correction.":[83],"experiments":[85],"show":[86],"that":[87,145],"LyricWhiz":[88,111],"significantly":[89],"reduces":[90],"Word":[91],"Error":[92],"Rate":[93],"compared":[94],"to":[95,112],"existing":[96],"methods":[97],"English":[99],"can":[101],"effectively":[102],"transcribe":[103],"across":[105],"multiple":[106],"languages.":[107],"Furthermore,":[108],"we":[109],"use":[110],"create":[113],"first":[115],"publicly":[116],"available,":[117],"large-scale,":[118],"multilingual":[119,156],"dataset":[122,150],"CC-BY-NC-SA":[125],"copy-right":[126],"license,":[127],"based":[128],"MTG-Jamendo,":[130],"offer":[132],"human-":[134],"annotated":[135],"subset":[136],"noise":[138],"level":[139],"estimation":[140],"evaluation.":[142],"anticipate":[144],"our":[146],"will":[151],"advance":[152],"development":[154],"of":[155],"transcription,":[158],"emerging":[162],"task.":[163]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
