{"id":"https://openalex.org/W7133324408","doi":"https://doi.org/10.48550/arxiv.2603.00576","title":"Efficient Long-Sequence Diffusion Modeling for Symbolic Music Generation","display_name":"Efficient Long-Sequence Diffusion Modeling for Symbolic Music Generation","publication_year":2026,"publication_date":"2026-02-28","ids":{"openalex":"https://openalex.org/W7133324408","doi":"https://doi.org/10.48550/arxiv.2603.00576"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.00576","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00576","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.00576","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074816324","display_name":"Jinhan Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xu, Jinhan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127961496","display_name":"Xing Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Xing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127907452","display_name":"Houpeng Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Houpeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127906431","display_name":"Haoran Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Haoran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127960407","display_name":"Shenghua Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Shenghua","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064212351","display_name":"Jiatao Chen","orcid":"https://orcid.org/0000-0003-4484-7441"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jiatao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127969026","display_name":"Tianming Xi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xi, Tianming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127948630","display_name":"Jing Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127899665","display_name":"Jiaojiao Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Jiaojiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5004457967","display_name":"Guangli Xiang","orcid":"https://orcid.org/0000-0002-6885-6265"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiang, Guangli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5074816324"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.45190000534057617,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.45190000534057617,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.39149999618530273,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.07559999823570251,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.580299973487854},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5442000031471252},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.516700029373169},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5001999735832214},{"id":"https://openalex.org/keywords/the-symbolic","display_name":"The Symbolic","score":0.49540001153945923},{"id":"https://openalex.org/keywords/music-information-retrieval","display_name":"Music information retrieval","score":0.44359999895095825},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4235000014305115},{"id":"https://openalex.org/keywords/musical","display_name":"Musical","score":0.40779998898506165}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7027000188827515},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.580299973487854},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5442000031471252},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.516700029373169},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5001999735832214},{"id":"https://openalex.org/C2776095079","wikidata":"https://www.wikidata.org/wiki/Q489538","display_name":"The Symbolic","level":2,"score":0.49540001153945923},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.44359999895095825},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4404999911785126},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4235000014305115},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.40779998898506165},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.38960000872612},{"id":"https://openalex.org/C23123167","wikidata":"https://www.wikidata.org/wiki/Q7661193","display_name":"Symbolic trajectory evaluation","level":3,"score":0.38940000534057617},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.357699990272522},{"id":"https://openalex.org/C167310288","wikidata":"https://www.wikidata.org/wiki/Q7564808","display_name":"Sound quality","level":2,"score":0.3190000057220459},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31150001287460327},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3093000054359436},{"id":"https://openalex.org/C109568592","wikidata":"https://www.wikidata.org/wiki/Q207628","display_name":"Musical composition","level":3,"score":0.30059999227523804},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.29670000076293945},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.29589998722076416},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.2930999994277954},{"id":"https://openalex.org/C65620979","wikidata":"https://www.wikidata.org/wiki/Q7661176","display_name":"Symbolic data analysis","level":2,"score":0.29120001196861267},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.2727000117301941},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.2689000070095062},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.26429998874664307},{"id":"https://openalex.org/C164218810","wikidata":"https://www.wikidata.org/wiki/Q7933382","display_name":"Violin musical styles","level":3,"score":0.2639000117778778}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.00576","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00576","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.00576","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00576","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","score":0.5821722149848938,"id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Symbolic":[0],"music":[1,110,119,123,164],"generation":[2,137],"is":[3],"a":[4,60,98,105,158,175],"challenging":[5],"task":[6],"in":[7],"multimedia":[8],"generation,":[9,165],"involving":[10],"long":[11,42,83],"sequences":[12,44],"with":[13,41,54,178],"hierarchical":[14],"temporal":[15],"structures,":[16],"long-range":[17],"dependencies,":[18],"and":[19,38,49,71,91,120,139,143,181],"fine-grained":[20],"local":[21,73,94],"details.":[22],"Though":[23],"recent":[24],"diffusion-based":[25],"models":[26,80],"produce":[27],"high":[28,36],"quality":[29,138],"generations,":[30],"they":[31],"tend":[32],"to":[33,46,65,81,148],"suffer":[34],"from":[35],"training":[37],"inference":[39],"costs":[40],"symbolic":[43,109,163],"due":[45],"iterative":[47],"denoising":[48],"sequence-length-related":[50],"costs.":[51],"To":[52],"deal":[53],"such":[55],"problem,":[56],"we":[57],"put":[58],"forth":[59],"diffusing":[61],"strategy":[62],"named":[63],"SMDIM":[64,75,127,156],"combine":[66],"efficient":[67],"global":[68],"structure":[69],"construction":[70],"light":[72],"refinement.":[74],"uses":[76],"structured":[77],"state":[78],"space":[79],"capture":[82],"range":[84,107],"musical":[85,95,150],"context":[86],"at":[87,184],"near":[88],"linear":[89],"cost,":[90],"selectively":[92],"refines":[93],"details":[96],"via":[97],"hybrid":[99],"refinement":[100],"scheme.":[101],"Experiments":[102],"performed":[103],"on":[104,134],"wide":[106],"of":[108],"datasets":[111],"which":[112],"encompass":[113],"various":[114],"Western":[115],"classical":[116],"music,":[117],"popular":[118],"traditional":[121],"folk":[122],"show":[124,154],"that":[125,155,169],"the":[126,130,136,140,171],"model":[128],"outperforms":[129],"other":[131],"state-of-the-art":[132],"approaches":[133],"both":[135],"computational":[141],"efficiency,":[142],"it":[144],"has":[145],"robust":[146],"generalization":[147],"underexplored":[149],"styles.":[151],"These":[152],"results":[153],"offers":[157],"principled":[159],"solution":[160],"for":[161],"long-sequence":[162],"including":[166],"associated":[167],"attributes":[168],"accompany":[170],"sequences.":[172],"We":[173],"provide":[174],"project":[176],"webpage":[177],"audio":[179],"examples":[180],"supplementary":[182],"materials":[183],"https://3328702107.github.io/smdim-music/.":[185]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-04T00:00:00"}
