{"id":"https://openalex.org/W7148567894","doi":"https://doi.org/10.1109/asru65441.2025.11434600","title":"Sinba: Singing-To-Accompaniment Generation With Pitch Guidance Via Mamba-Based Language Model","display_name":"Sinba: Singing-To-Accompaniment Generation With Pitch Guidance Via Mamba-Based Language Model","publication_year":2025,"publication_date":"2025-12-06","ids":{"openalex":"https://openalex.org/W7148567894","doi":"https://doi.org/10.1109/asru65441.2025.11434600"},"language":null,"primary_location":{"id":"doi:10.1109/asru65441.2025.11434600","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434600","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132778161","display_name":"Jianwei Cui","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jianwei Cui","raw_affiliation_strings":["University of Science and Technology of China,NERC-SLIP,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,NERC-SLIP,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132783052","display_name":"Shihao Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shihao Chen","raw_affiliation_strings":["University of Science and Technology of China,NERC-SLIP,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,NERC-SLIP,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101903852","display_name":"Yu Gu","orcid":"https://orcid.org/0000-0003-0135-2326"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Gu","raw_affiliation_strings":["University of Science and Technology of China,NERC-SLIP,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,NERC-SLIP,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101603527","display_name":"Jing Zhang","orcid":"https://orcid.org/0000-0003-3064-7625"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Zhang","raw_affiliation_strings":["University of Science and Technology of China,NERC-SLIP,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,NERC-SLIP,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100430202","display_name":"Liping Chen","orcid":"https://orcid.org/0009-0009-0018-9564"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liping Chen","raw_affiliation_strings":["University of Science and Technology of China,NERC-SLIP,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,NERC-SLIP,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100368048","display_name":"Na Li","orcid":"https://orcid.org/0000-0001-9545-3050"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Na Li","raw_affiliation_strings":["University of Science and Technology of China,NERC-SLIP,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,NERC-SLIP,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132747965","display_name":"Chengxing Li","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengxing Li","raw_affiliation_strings":["University of Science and Technology of China,NERC-SLIP,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,NERC-SLIP,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101736420","display_name":"Shan Yang","orcid":"https://orcid.org/0000-0003-4464-146X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shan Yang","raw_affiliation_strings":["University of Science and Technology of China,NERC-SLIP,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,NERC-SLIP,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5126699929","display_name":"Lirong Dai","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lirong Dai","raw_affiliation_strings":["University of Science and Technology of China,NERC-SLIP,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,NERC-SLIP,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5132778161"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.75423849,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.445499986410141,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.445499986410141,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.05990000069141388,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.04699999839067459,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.37720000743865967},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.27219998836517334},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.24729999899864197},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.24089999496936798}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5881999731063843},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40220001339912415},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.37720000743865967},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3312000036239624},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.27219998836517334},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.25440001487731934},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.24729999899864197},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.24089999496936798},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2361000031232834},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.22849999368190765}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru65441.2025.11434600","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434600","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4996253550052643}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1965555277","https://openalex.org/W1970491336","https://openalex.org/W2063016658","https://openalex.org/W2124299914","https://openalex.org/W2747070883","https://openalex.org/W2903502793","https://openalex.org/W2997367363","https://openalex.org/W3081279708","https://openalex.org/W3088329082","https://openalex.org/W3133525064","https://openalex.org/W3158762648","https://openalex.org/W3207340675","https://openalex.org/W3209059054","https://openalex.org/W4296068763","https://openalex.org/W4375928773","https://openalex.org/W4381786045","https://openalex.org/W4385823416","https://openalex.org/W4392904007","https://openalex.org/W4392908898"],"related_works":[],"abstract_inverted_index":{"In":[0,92],"this":[1,93],"paper,":[2,94],"we":[3,95],"propose":[4],"Sinba,":[5],"a":[6,31,43,64],"system":[7],"that":[8,140,148],"can":[9,144],"directly":[10,82],"generate":[11,145],"corresponding":[12],"background":[13],"accompaniment":[14,147],"music":[15],"from":[16,114],"vocal":[17,116,156],"input,":[18,157],"allowing":[19],"users":[20],"to":[21,56,77],"create":[22],"complete":[23],"songs":[24],"using":[25,130],"only":[26],"sung":[27],"vocals.":[28],"Sinba":[29],"adopts":[30],"decoder-only":[32],"backbone":[33,90],"network":[34],"architecture.":[35],"We":[36,109],"utilize":[37],"the":[38,72,89,98,115,123,141,150,155,159],"Mamba":[39,73],"model,":[40],"which":[41],"is":[42,128],"linear-time":[44],"sequence":[45],"modeling":[46,69],"method":[47],"with":[48],"selective":[49],"state":[50],"spaces":[51],"and":[52,101,135,152],"has":[53],"been":[54],"proven":[55],"achieve":[57],"more":[58],"advanced":[59],"performance":[60],"than":[61],"Transformers":[62],"as":[63,88,118],"foundation":[65],"model":[66,127,143],"in":[67],"long-sequence":[68],"tasks.":[70],"However,":[71],"was":[74],"initially":[75],"applied":[76],"audio":[78,85,163],"tasks":[79],"by":[80],"pre-training":[81],"on":[83],"raw":[84],"waveform":[86],"samples":[87,164],"model.":[91,124],"convert":[96],"both":[97],"training":[99],"targets":[100],"inputs":[102],"into":[103],"discretized":[104],"tokens":[105],"for":[106,122],"direct":[107],"training.":[108],"also":[110],"extract":[111],"pitch":[112],"information":[113],"input":[117],"an":[119],"additional":[120],"feature":[121],"The":[125],"proposed":[126,142],"trained":[129],"source-separated":[131],"data":[132],"pairs.":[133],"Subjective":[134],"objective":[136],"experimental":[137],"results":[138],"demonstrate":[139],"high-quality":[146],"matches":[149],"style":[151],"rhythm":[153],"of":[154],"outperforming":[158],"Transformerbased":[160],"baseline.":[161],"Synthesized":[162],"are":[165],"available":[166],"at:":[167],"https://sounddemos.github.io/sinba.":[168]},"counts_by_year":[],"updated_date":"2026-04-03T16:44:17.987007","created_date":"2026-04-03T00:00:00"}
