{"id":"https://openalex.org/W3015315843","doi":"https://doi.org/10.1109/icassp40776.2020.9054567","title":"Automatic Lyrics Alignment and Transcription in Polyphonic Music: Does Background Music Help?","display_name":"Automatic Lyrics Alignment and Transcription in Polyphonic Music: Does Background Music Help?","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3015315843","doi":"https://doi.org/10.1109/icassp40776.2020.9054567","mag":"3015315843"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9054567","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054567","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016367573","display_name":"Chitralekha Gupta","orcid":"https://orcid.org/0000-0003-1350-9095"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Chitralekha Gupta","raw_affiliation_strings":["Department of Electrical and Computer Engineering, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004568334","display_name":"Emre Y\u0131lmaz","orcid":"https://orcid.org/0000-0001-7466-3358"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Emre Yilmaz","raw_affiliation_strings":["Department of Electrical and Computer Engineering, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":"https://orcid.org/0000-0001-9158-9401"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Haizhou Li","raw_affiliation_strings":["Department of Electrical and Computer Engineering, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5016367573"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":4.1136,"has_fulltext":false,"cited_by_count":35,"citation_normalized_percentile":{"value":0.94758539,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"496","last_page":"500"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lyrics","display_name":"Lyrics","score":0.9797152280807495},{"id":"https://openalex.org/keywords/polyphony","display_name":"Polyphony","score":0.8427368402481079},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.746872067451477},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7368633151054382},{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.7072485089302063},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6447934508323669},{"id":"https://openalex.org/keywords/popular-music","display_name":"Popular music","score":0.44897499680519104},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4372102618217468},{"id":"https://openalex.org/keywords/music-information-retrieval","display_name":"Music information retrieval","score":0.412311315536499},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3347264528274536},{"id":"https://openalex.org/keywords/musical","display_name":"Musical","score":0.2654758095741272},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.23894888162612915},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.15560626983642578},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.14405688643455505},{"id":"https://openalex.org/keywords/visual-arts","display_name":"Visual arts","score":0.09243470430374146},{"id":"https://openalex.org/keywords/literature","display_name":"Literature","score":0.09180682897567749}],"concepts":[{"id":"https://openalex.org/C2776436406","wikidata":"https://www.wikidata.org/wiki/Q602446","display_name":"Lyrics","level":2,"score":0.9797152280807495},{"id":"https://openalex.org/C128979739","wikidata":"https://www.wikidata.org/wiki/Q179465","display_name":"Polyphony","level":2,"score":0.8427368402481079},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.746872067451477},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7368633151054382},{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.7072485089302063},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6447934508323669},{"id":"https://openalex.org/C114611597","wikidata":"https://www.wikidata.org/wiki/Q373342","display_name":"Popular music","level":2,"score":0.44897499680519104},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4372102618217468},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.412311315536499},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3347264528274536},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.2654758095741272},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.23894888162612915},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.15560626983642578},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.14405688643455505},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.09243470430374146},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.09180682897567749},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9054567","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054567","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5099999904632568,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W112239495","https://openalex.org/W1164175658","https://openalex.org/W1494198834","https://openalex.org/W1524333225","https://openalex.org/W1686810756","https://openalex.org/W2057745663","https://openalex.org/W2098796164","https://openalex.org/W2102113734","https://openalex.org/W2104512693","https://openalex.org/W2133824856","https://openalex.org/W2134387846","https://openalex.org/W2144827818","https://openalex.org/W2158880150","https://openalex.org/W2320195920","https://openalex.org/W2327501763","https://openalex.org/W2340080546","https://openalex.org/W2398871418","https://openalex.org/W2407080277","https://openalex.org/W2577008904","https://openalex.org/W2606974598","https://openalex.org/W2736648940","https://openalex.org/W2766219058","https://openalex.org/W2769075302","https://openalex.org/W2774998006","https://openalex.org/W2888867175","https://openalex.org/W2889429804","https://openalex.org/W2903006902","https://openalex.org/W2917340025","https://openalex.org/W2935701729","https://openalex.org/W2962780374","https://openalex.org/W2963452667","https://openalex.org/W2972856300","https://openalex.org/W2973071600","https://openalex.org/W4235128394","https://openalex.org/W6604617924","https://openalex.org/W6627665034","https://openalex.org/W6629717138","https://openalex.org/W6631362777","https://openalex.org/W6637373629","https://openalex.org/W6675365184","https://openalex.org/W6683183305","https://openalex.org/W6712736788","https://openalex.org/W6713762819","https://openalex.org/W6732426729","https://openalex.org/W6741568435","https://openalex.org/W6745913901","https://openalex.org/W6746559740","https://openalex.org/W6751512325"],"related_works":["https://openalex.org/W2378183644","https://openalex.org/W2287414930","https://openalex.org/W2067430219","https://openalex.org/W2964472689","https://openalex.org/W2263813420","https://openalex.org/W4379348330","https://openalex.org/W2994064478","https://openalex.org/W2491013962","https://openalex.org/W2762717821","https://openalex.org/W2144707026"],"abstract_inverted_index":{"Automatic":[0],"lyrics":[1,48,54,105],"alignment":[2,55,106],"and":[3,56,67,75,107],"transcription":[4,57,108],"in":[5,103],"polyphonic":[6,33,112],"music":[7,28,73,89],"are":[8,15],"challenging":[9],"tasks":[10,109],"because":[11],"the":[12,18,45,53,64,70,79,88,104],"singing":[13],"vocals":[14],"corrupted":[16],"by":[17],"background":[19],"music.":[20],"In":[21],"this":[22],"work,":[23],"we":[24,85],"propose":[25],"to":[26,31],"learn":[27],"genre-specific":[29],"characteristics":[30],"train":[32],"acoustic":[34,61,94],"models.":[35],"We":[36,50],"first":[37],"compare":[38],"several":[39],"automatic":[40],"speech":[41],"recognition":[42],"pipelines":[43],"for":[44,63],"application":[46],"of":[47,59,72],"transcription.":[49],"then":[51],"present":[52],"performance":[58],"music-informed":[60],"models":[62],"best-performing":[65],"pipeline,":[66],"systematically":[68],"study":[69],"impact":[71],"genre":[74],"language":[76],"model":[77,87],"on":[78,110],"performance.":[80],"With":[81],"such":[82],"genre-based":[83],"approach,":[84],"explicitly":[86],"without":[90],"removing":[91],"it":[92],"during":[93],"modeling.":[95],"The":[96],"proposed":[97],"approach":[98],"outperforms":[99],"all":[100],"competing":[101],"systems":[102],"well-known":[111],"test":[113],"datasets.":[114]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":2}],"updated_date":"2026-03-29T08:15:47.926485","created_date":"2025-10-10T00:00:00"}
