{"id":"https://openalex.org/W2797489470","doi":"https://doi.org/10.1109/icassp.2018.8462247","title":"Transcribing Lyrics from Commercial Song Audio: the First Step Towards Singing Content Processing","display_name":"Transcribing Lyrics from Commercial Song Audio: the First Step Towards Singing Content Processing","publication_year":2018,"publication_date":"2018-04-01","ids":{"openalex":"https://openalex.org/W2797489470","doi":"https://doi.org/10.1109/icassp.2018.8462247","mag":"2797489470"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2018.8462247","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2018.8462247","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1804.05306","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037471636","display_name":"Che-Ping Tsai","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Che-Ping Tsai","raw_affiliation_strings":["Department of Electrical Engineering, National Taiwan University","Department of Electrical Engineering , National Taiwan University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, National Taiwan University","institution_ids":["https://openalex.org/I16733864"]},{"raw_affiliation_string":"Department of Electrical Engineering , National Taiwan University","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016343106","display_name":"Yi-Lin Tuan","orcid":"https://orcid.org/0009-0006-0556-515X"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yi-Lin Tuan","raw_affiliation_strings":["Department of Electrical Engineering, National Taiwan University","Department of Electrical Engineering , National Taiwan University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, National Taiwan University","institution_ids":["https://openalex.org/I16733864"]},{"raw_affiliation_string":"Department of Electrical Engineering , National Taiwan University","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044010123","display_name":"Lin-shan Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Lin-Shan Lee","raw_affiliation_strings":["Department of Electrical Engineering, National Taiwan University","Department of Electrical Engineering , National Taiwan University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, National Taiwan University","institution_ids":["https://openalex.org/I16733864"]},{"raw_affiliation_string":"Department of Electrical Engineering , National Taiwan University","institution_ids":["https://openalex.org/I16733864"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.03089357,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5749","last_page":"5753"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lyrics","display_name":"Lyrics","score":0.9503973722457886},{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.8733476996421814},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7410149574279785},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.643639326095581},{"id":"https://openalex.org/keywords/phone","display_name":"Phone","score":0.5173113346099854},{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.5138028264045715},{"id":"https://openalex.org/keywords/duration","display_name":"Duration (music)","score":0.4686034917831421},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.39180606603622437},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.2360578179359436},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.1527378261089325},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.12681609392166138},{"id":"https://openalex.org/keywords/literature","display_name":"Literature","score":0.07053354382514954}],"concepts":[{"id":"https://openalex.org/C2776436406","wikidata":"https://www.wikidata.org/wiki/Q602446","display_name":"Lyrics","level":2,"score":0.9503973722457886},{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.8733476996421814},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7410149574279785},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.643639326095581},{"id":"https://openalex.org/C2778707766","wikidata":"https://www.wikidata.org/wiki/Q202064","display_name":"Phone","level":2,"score":0.5173113346099854},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.5138028264045715},{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.4686034917831421},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39180606603622437},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2360578179359436},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.1527378261089325},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.12681609392166138},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.07053354382514954},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icassp.2018.8462247","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2018.8462247","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1804.05306","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1804.05306","pdf_url":"https://arxiv.org/pdf/1804.05306","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2797489470","is_oa":true,"landing_page_url":"https://arxiv.org/abs/1804.05306","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1804.05306","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1804.05306","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1804.05306","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1804.05306","pdf_url":"https://arxiv.org/pdf/1804.05306","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.6800000071525574,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2797489470.pdf","grobid_xml":"https://content.openalex.org/works/W2797489470.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W14941018","https://openalex.org/W112239495","https://openalex.org/W1494198834","https://openalex.org/W1524333225","https://openalex.org/W1984076147","https://openalex.org/W2002342963","https://openalex.org/W2002890640","https://openalex.org/W2019211927","https://openalex.org/W2026369565","https://openalex.org/W2065147193","https://openalex.org/W2098044214","https://openalex.org/W2121842316","https://openalex.org/W2148986421","https://openalex.org/W2156230116","https://openalex.org/W2293634267","https://openalex.org/W2397025091","https://openalex.org/W2404881943","https://openalex.org/W2405834540","https://openalex.org/W2407080277","https://openalex.org/W2577008904","https://openalex.org/W2684482591","https://openalex.org/W2729190387","https://openalex.org/W2962711077","https://openalex.org/W2962904995","https://openalex.org/W4235128394","https://openalex.org/W6631362777","https://openalex.org/W6631875079","https://openalex.org/W6681443795","https://openalex.org/W6684812462","https://openalex.org/W6696934422","https://openalex.org/W6712477596","https://openalex.org/W6713762819","https://openalex.org/W6713940333","https://openalex.org/W6732426729","https://openalex.org/W6740627434"],"related_works":["https://openalex.org/W2059239154","https://openalex.org/W3081279708","https://openalex.org/W2552590214","https://openalex.org/W2935701729","https://openalex.org/W2157814890","https://openalex.org/W1945892259","https://openalex.org/W2158880150","https://openalex.org/W2057745663","https://openalex.org/W3207038517","https://openalex.org/W2811952512","https://openalex.org/W2066500288","https://openalex.org/W2892087737","https://openalex.org/W2932175339","https://openalex.org/W3017297317","https://openalex.org/W2347714284","https://openalex.org/W2028766658","https://openalex.org/W3155036195","https://openalex.org/W2053312608","https://openalex.org/W2597614303","https://openalex.org/W2951961943"],"abstract_inverted_index":{"Spoken":[0],"content":[1,13],"processing":[2],"(such":[3],"as":[4,30,36],"retrieval":[5],"and":[6,32],"browsing)":[7],"is":[8,14],"maturing,":[9],"but":[10,126],"the":[11,54,65,122],"singing":[12,94],"still":[15,127],"almost":[16],"completely":[17],"left":[18],"out.":[19],"Songs":[20],"are":[21],"human":[22],"voice":[23],"carrying":[24],"plenty":[25],"of":[26,40,67,88],"semantic":[27],"information":[28],"just":[29],"speech,":[31],"may":[33],"be":[34],"considered":[35],"a":[37],"special":[38,112],"type":[39],"speech":[41],"with":[42,103,106],"highly":[43,60],"flexible":[44,61],"prosody.":[45],"The":[46,96,114],"various":[47],"problems":[48],"in":[49],"song":[50,70],"audio,":[51],"for":[52],"example":[53],"significantly":[55,119],"changing":[56],"phone":[57],"duration":[58],"over":[59],"pitch":[62],"contours,":[63],"make":[64],"recognition":[66],"lyrics":[68],"from":[69,92],"audio":[71],"much":[72],"more":[73],"difficult.":[74],"This":[75],"paper":[76],"reports":[77],"an":[78],"initial":[79],"attempt":[80],"towards":[81],"this":[82],"goal.":[83],"We":[84],"collected":[85],"music-removed":[86],"version":[87],"English":[89],"songs":[90],"directly":[91],"commercial":[93],"content.":[95],"best":[97],"results":[98],"were":[99],"obtained":[100],"by":[101],"TDNN-BLSTM":[102],"data":[104],"augmentation":[105],"3-fold":[107],"speed":[108],"perturbation":[109],"plus":[110],"some":[111],"approaches.":[113],"WER":[115],"achieved":[116],"(73.90%)":[117],"was":[118],"lower":[120],"than":[121],"baseline":[123],"(96.21":[124],"%),":[125],"relatively":[128],"high.":[129]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2022-10-06T00:00:00"}
