{"id":"https://openalex.org/W4396877866","doi":"https://doi.org/10.1109/taslp.2024.3399614","title":"Interactive Singing Melody Extraction Based on Active Adaptation","display_name":"Interactive Singing Melody Extraction Based on Active Adaptation","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4396877866","doi":"https://doi.org/10.1109/taslp.2024.3399614"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3399614","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3399614","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008569069","display_name":"Kavya Ranjan Saxena","orcid":"https://orcid.org/0000-0002-6590-2019"},"institutions":[{"id":"https://openalex.org/I94234084","display_name":"Indian Institute of Technology Kanpur","ror":"https://ror.org/05pjsgx75","country_code":"IN","type":"education","lineage":["https://openalex.org/I94234084"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Kavya Ranjan Saxena","raw_affiliation_strings":["Department of Electrical Engineering, Indian Institute of Technology Kanpur, Kanpur, India"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Indian Institute of Technology Kanpur, Kanpur, India","institution_ids":["https://openalex.org/I94234084"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011121139","display_name":"Vipul Arora","orcid":"https://orcid.org/0000-0002-1207-1258"},"institutions":[{"id":"https://openalex.org/I94234084","display_name":"Indian Institute of Technology Kanpur","ror":"https://ror.org/05pjsgx75","country_code":"IN","type":"education","lineage":["https://openalex.org/I94234084"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Vipul Arora","raw_affiliation_strings":["Department of Electrical Engineering, Indian Institute of Technology Kanpur, Kanpur, India"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Indian Institute of Technology Kanpur, Kanpur, India","institution_ids":["https://openalex.org/I94234084"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5008569069"],"corresponding_institution_ids":["https://openalex.org/I94234084"],"apc_list":null,"apc_paid":null,"fwci":2.2413,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.88226161,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"32","issue":null,"first_page":"2729","last_page":"2738"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9782000184059143,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9782000184059143,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9675999879837036,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.8960859775543213},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.6325007677078247},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5330625176429749},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5034062266349792},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.4912366271018982},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.4100215435028076},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3213768005371094},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.1718982458114624},{"id":"https://openalex.org/keywords/neuroscience","display_name":"Neuroscience","score":0.08090361952781677},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.07807859778404236}],"concepts":[{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.8960859775543213},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.6325007677078247},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5330625176429749},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5034062266349792},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.4912366271018982},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.4100215435028076},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3213768005371094},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.1718982458114624},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.08090361952781677},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.07807859778404236},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2024.3399614","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3399614","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.699999988079071}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1526336542","https://openalex.org/W1875231349","https://openalex.org/W1975672287","https://openalex.org/W1978633512","https://openalex.org/W1988844105","https://openalex.org/W2065010255","https://openalex.org/W2108598243","https://openalex.org/W2131953535","https://openalex.org/W2134039779","https://openalex.org/W2161632835","https://openalex.org/W2214409633","https://openalex.org/W2471138382","https://openalex.org/W2932319787","https://openalex.org/W2963118547","https://openalex.org/W2963275094","https://openalex.org/W2963327260","https://openalex.org/W2963535133","https://openalex.org/W2964177567","https://openalex.org/W2970565181","https://openalex.org/W2973077827","https://openalex.org/W3163379884","https://openalex.org/W3176720610","https://openalex.org/W3191088441","https://openalex.org/W3203419049","https://openalex.org/W3205626500","https://openalex.org/W4224920338","https://openalex.org/W6605255326","https://openalex.org/W6639350448","https://openalex.org/W6679227803","https://openalex.org/W6714030504","https://openalex.org/W6717697761","https://openalex.org/W6735374517","https://openalex.org/W6736057607","https://openalex.org/W6743661861","https://openalex.org/W6746836464","https://openalex.org/W6747231328","https://openalex.org/W6756648248","https://openalex.org/W6760299066","https://openalex.org/W6781553146"],"related_works":["https://openalex.org/W2390529913","https://openalex.org/W2142368101","https://openalex.org/W2372249404","https://openalex.org/W2367547137","https://openalex.org/W2354994102","https://openalex.org/W2387733758","https://openalex.org/W2376664795","https://openalex.org/W2366077683","https://openalex.org/W1501596003","https://openalex.org/W2368036937"],"abstract_inverted_index":{"Extraction":[0],"of":[1,9,16,33,56,72,84,229,233],"predominant":[2],"pitch":[3],"from":[4,50,70,99,170],"polyphonic":[5],"audio":[6,35,122,227],"is":[7,37,198],"one":[8,51],"the":[10,14,41,92,100,117,120,142,148,179,186],"fundamental":[11],"tasks":[12],"in":[13,67,78,119,178],"field":[15],"music":[17],"information":[18],"retrieval":[19],"and":[20,200,221],"computational":[21],"musicology.":[22],"To":[23],"accomplish":[24],"this":[25,104],"task":[26],"using":[27,94,127,151],"machine":[28],"learning,":[29],"a":[30,44,57,73,128,157,166,171,218],"large":[31],"amount":[32],"labeled":[34],"data":[36,49,98],"required":[38],"to":[39,144,147,205,211],"train":[40],"model.":[42],"Moreover,":[43],"classical":[45],"model":[46,93,143],"pre-trained":[47],"on":[48,132],"domain":[52,150],"(source),":[53],"e.g.,":[54],"songs":[55,71],"particular":[58],"singer":[59,75],"or":[60,76],"genre,":[61],"may":[62],"not":[63],"perform":[64],"comparatively":[65],"well":[66],"extracting":[68],"melody":[69,111,192,208,238],"different":[74],"genre":[77],"other":[79,190,206],"domains":[80],"(target).":[81],"The":[82,137,195],"performance":[83],"such":[85],"models":[86,210],"can":[87,202],"be":[88,203],"boosted":[89],"by":[90,141],"adapting":[91],"very":[95],"little":[96],"annotated":[97],"target":[101,121,149,180],"domain.":[102,181],"In":[103],"work,":[105],"we":[106,216],"propose":[107],"an":[108],"efficient":[109],"interactive":[110],"adaptation":[112,177],"method.":[113],"Our":[114,153],"method":[115,154,188,197],"selects":[116],"regions":[118],"that":[123,161,185],"require":[124],"human":[125],"annotation":[126],"confidence":[129],"criterion":[130],"based":[131],"normalized":[133],"true":[134],"class":[135,163],"probability.":[136],"annotations":[138],"are":[139,174],"used":[140],"adapt":[145],"itself":[146],"meta-learning.":[152],"also":[155],"provides":[156],"novel":[158],"meta-learning":[159],"approach":[160],"handles":[162],"imbalance,":[164],"i.e.,":[165],"few":[167,172],"representative":[168],"samples":[169],"classes":[173],"available":[175],"for":[176,236],"Experimental":[182],"results":[183],"show":[184],"proposed":[187,196],"outperforms":[189],"adaptive":[191],"extraction":[193,209,239],"baselines.":[194],"model-agnostic":[199],"hence":[201],"applied":[204],"non-adaptive":[207],"boost":[212],"their":[213],"performance.":[214],"Also,":[215],"released":[217],"Hindustani":[219],"Alankaar":[220],"Raga":[222],"(HAR)":[223],"dataset":[224],"containing":[225],"523":[226],"files":[228],"about":[230],"6.86":[231],"hours":[232],"duration":[234],"intended":[235],"singing":[237],"tasks.":[240]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
