{"id":"https://openalex.org/W3081753361","doi":"https://doi.org/10.1109/spcom50965.2020.9179583","title":"Effectiveness of Transfer Learning on Singing Voice Conversion in the Presence of Background Music","display_name":"Effectiveness of Transfer Learning on Singing Voice Conversion in the Presence of Background Music","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W3081753361","doi":"https://doi.org/10.1109/spcom50965.2020.9179583","mag":"3081753361"},"language":"en","primary_location":{"id":"doi:10.1109/spcom50965.2020.9179583","is_oa":false,"landing_page_url":"https://doi.org/10.1109/spcom50965.2020.9179583","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Conference on Signal Processing and Communications (SPCOM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048618668","display_name":"Divyesh G. Rajpura","orcid":null},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Divyesh G. Rajpura","raw_affiliation_strings":["Speech Research Lab, Dhirubhai Ambani Institute of Information and Communication Technology (DA-IICT), Gandhinagar, Gujarat, India"],"affiliations":[{"raw_affiliation_string":"Speech Research Lab, Dhirubhai Ambani Institute of Information and Communication Technology (DA-IICT), Gandhinagar, Gujarat, India","institution_ids":["https://openalex.org/I98389781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109455358","display_name":"Jui Shah","orcid":null},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Jui Shah","raw_affiliation_strings":["Speech Research Lab, Dhirubhai Ambani Institute of Information and Communication Technology (DA-IICT), Gandhinagar, Gujarat, India"],"affiliations":[{"raw_affiliation_string":"Speech Research Lab, Dhirubhai Ambani Institute of Information and Communication Technology (DA-IICT), Gandhinagar, Gujarat, India","institution_ids":["https://openalex.org/I98389781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018555559","display_name":"Maitreya Patel","orcid":null},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Maitreya Patel","raw_affiliation_strings":["Speech Research Lab, Dhirubhai Ambani Institute of Information and Communication Technology (DA-IICT), Gandhinagar, Gujarat, India"],"affiliations":[{"raw_affiliation_string":"Speech Research Lab, Dhirubhai Ambani Institute of Information and Communication Technology (DA-IICT), Gandhinagar, Gujarat, India","institution_ids":["https://openalex.org/I98389781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037317140","display_name":"Harshit Malaviya","orcid":null},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Harshit Malaviya","raw_affiliation_strings":["Speech Research Lab, Dhirubhai Ambani Institute of Information and Communication Technology (DA-IICT), Gandhinagar, Gujarat, India"],"affiliations":[{"raw_affiliation_string":"Speech Research Lab, Dhirubhai Ambani Institute of Information and Communication Technology (DA-IICT), Gandhinagar, Gujarat, India","institution_ids":["https://openalex.org/I98389781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050623597","display_name":"Kirtana Sunil Phatnani","orcid":"https://orcid.org/0000-0001-9988-1167"},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Kirtana Phatnani","raw_affiliation_strings":["Speech Research Lab, Dhirubhai Ambani Institute of Information and Communication Technology (DA-IICT), Gandhinagar, Gujarat, India"],"affiliations":[{"raw_affiliation_string":"Speech Research Lab, Dhirubhai Ambani Institute of Information and Communication Technology (DA-IICT), Gandhinagar, Gujarat, India","institution_ids":["https://openalex.org/I98389781"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043002276","display_name":"Hemant A. Patil","orcid":"https://orcid.org/0000-0002-4068-2005"},"institutions":[{"id":"https://openalex.org/I98389781","display_name":"Dhirubhai Ambani Institute of Information and Communication Technology","ror":"https://ror.org/02d5b7g69","country_code":"IN","type":"education","lineage":["https://openalex.org/I98389781"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Hemant A. Patil","raw_affiliation_strings":["Speech Research Lab, Dhirubhai Ambani Institute of Information and Communication Technology (DA-IICT), Gandhinagar, Gujarat, India"],"affiliations":[{"raw_affiliation_string":"Speech Research Lab, Dhirubhai Ambani Institute of Information and Communication Technology (DA-IICT), Gandhinagar, Gujarat, India","institution_ids":["https://openalex.org/I98389781"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5048618668"],"corresponding_institution_ids":["https://openalex.org/I98389781"],"apc_list":null,"apc_paid":null,"fwci":0.7577,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.70980908,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"10","issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8367590308189392},{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.6988900899887085},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6680477261543274},{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.6064474582672119},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.6011011004447937},{"id":"https://openalex.org/keywords/lyrics","display_name":"Lyrics","score":0.5990442633628845},{"id":"https://openalex.org/keywords/trace","display_name":"TRACE (psycholinguistics)","score":0.5086241960525513},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.49563539028167725},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4827362895011902},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.48069435358047485},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.47010916471481323},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.43390604853630066},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.42779096961021423},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.4117392599582672},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.3162139654159546}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8367590308189392},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.6988900899887085},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6680477261543274},{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.6064474582672119},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.6011011004447937},{"id":"https://openalex.org/C2776436406","wikidata":"https://www.wikidata.org/wiki/Q602446","display_name":"Lyrics","level":2,"score":0.5990442633628845},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.5086241960525513},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.49563539028167725},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4827362895011902},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.48069435358047485},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.47010916471481323},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.43390604853630066},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.42779096961021423},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.4117392599582672},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.3162139654159546},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/spcom50965.2020.9179583","is_oa":false,"landing_page_url":"https://doi.org/10.1109/spcom50965.2020.9179583","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Conference on Signal Processing and Communications (SPCOM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6800000071525574,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1482149378","https://openalex.org/W2009955221","https://openalex.org/W2030149476","https://openalex.org/W2064675550","https://openalex.org/W2067709094","https://openalex.org/W2077865492","https://openalex.org/W2099471712","https://openalex.org/W2127851351","https://openalex.org/W2221409856","https://openalex.org/W2295406225","https://openalex.org/W2460742184","https://openalex.org/W2558649592","https://openalex.org/W2735663686","https://openalex.org/W2774848319","https://openalex.org/W2888796252","https://openalex.org/W2889404825","https://openalex.org/W2921857201","https://openalex.org/W2937579788","https://openalex.org/W2962715207","https://openalex.org/W2962793481","https://openalex.org/W2963539064","https://openalex.org/W2963830550","https://openalex.org/W2972667718","https://openalex.org/W2972812066","https://openalex.org/W2996414377","https://openalex.org/W3012437242","https://openalex.org/W3012498027","https://openalex.org/W3101689408","https://openalex.org/W3124972797","https://openalex.org/W4298310324","https://openalex.org/W4320013936","https://openalex.org/W4396738389","https://openalex.org/W6746801104","https://openalex.org/W6866357234"],"related_works":["https://openalex.org/W2029561777","https://openalex.org/W1554502231","https://openalex.org/W172797710","https://openalex.org/W2360952181","https://openalex.org/W3165080709","https://openalex.org/W2945105049","https://openalex.org/W2626699140","https://openalex.org/W4387098302","https://openalex.org/W2948317131","https://openalex.org/W4225326921"],"abstract_inverted_index":{"Singing":[0],"voice":[1,29],"conversion":[2,30],"(SVC)":[3],"is":[4,88,99,144],"a":[5,171,203],"task":[6],"of":[7,11,34,52,95,101,112,163,208],"converting":[8],"the":[9,12,17,32,35,50,67,73,84,93,102,109,115,159,184,212,220,224,229],"perception":[10],"source":[13,110,142],"speaker's":[14],"identity":[15],"to":[16,157,187,223],"target":[18],"speaker":[19],"without":[20],"changing":[21],"lyrics":[22],"and":[23,43,130,166,180,196,199],"rhythm.":[24],"Recent":[25],"approaches":[26],"in":[27,49,66,92,123,228],"traditional":[28,70],"involve":[31],"use":[33,170,183],"generative":[36],"models,":[37],"such":[38],"as":[39,106,176],"Variational":[40],"Autoencoders":[41],"(VAE),":[42],"Generative":[44],"Adversarial":[45],"Networks":[46],"(GANs).":[47],"However,":[48],"case":[51],"SVC,":[53,139],"GANs":[54],"are":[55],"not":[56,89],"explored":[57],"much.":[58],"The":[59,76],"only":[60],"system":[61],"that":[62,201],"has":[63],"been":[64],"proposed":[65],"literature":[68],"uses":[69],"GAN":[71,134],"on":[72,216],"parallel":[74,77],"data.":[75],"data":[78],"collection":[79],"for":[80,137,210],"real":[81],"scenarios":[82],"(with":[83],"same":[85],"background":[86,96],"music)":[87],"feasible.":[90],"Moreover,":[91],"presence":[94],"music,":[97],"SVC":[98],"one":[100],"most":[103],"challenging":[104,173],"tasks":[105],"it":[107],"involves":[108],"separation":[111,143],"vocals":[113],"from":[114],"inputs,":[116],"which":[117],"will":[118],"have":[119],"some":[120],"noise.":[121],"Therefore,":[122],"this":[124],"paper,":[125],"we":[126,169,181],"propose":[127],"transfer":[128,164],"learning,":[129],"fine-tuning-based":[131],"Cycle":[132],"consistent":[133],"(CycleGAN)":[135],"model":[136,214],"non-parallel":[138],"where":[140],"music":[141],"done":[145],"using":[146],"Deep":[147],"Attractor":[148],"Network":[149],"(DANet).":[150],"We":[151,190],"designed":[152],"seven":[153],"different":[154],"possible":[155,161],"systems":[156,226],"identify":[158],"best":[160,221],"combination":[162],"learning":[165],"fine-tuning.":[167],"Here,":[168],"more":[172],"database,":[174],"MUSDB18,":[175],"our":[177],"primary":[178],"dataset,":[179],"also":[182],"NUS-48E":[185,217],"database":[186],"pre-train":[188],"CycleGAN.":[189],"perform":[191],"extensive":[192],"analysis":[193],"via":[194],"objective":[195],"subjective":[197],"measures":[198],"report":[200],"with":[202],"4.14":[204],"MOS":[205],"score":[206],"out":[207],"5":[209],"naturalness,":[211],"CycleGAN":[213],"pre-trained":[215],"corpus":[218],"performs":[219],"compared":[222],"other":[225],"described":[227],"paper.":[230]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
