{"id":"https://openalex.org/W7140162153","doi":"https://doi.org/10.48550/arxiv.2603.21282","title":"Fusing Memory and Attention: A study on LSTM, Transformer and Hybrid Architectures for Symbolic Music Generation","display_name":"Fusing Memory and Attention: A study on LSTM, Transformer and Hybrid Architectures for Symbolic Music Generation","publication_year":2026,"publication_date":"2026-03-22","ids":{"openalex":"https://openalex.org/W7140162153","doi":"https://doi.org/10.48550/arxiv.2603.21282"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.21282","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.21282","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.21282","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Ghoshal, Soudeep","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ghoshal, Soudeep","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Chakraborty, Sandipan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chakraborty, Sandipan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Chowdhury, Pradipto","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chowdhury, Pradipto","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Buckchash, Himanshu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Buckchash, Himanshu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9369000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9369000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.027000000700354576,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10788","display_name":"Neuroscience and Music Perception","score":0.014600000344216824,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6800000071525574},{"id":"https://openalex.org/keywords/melody","display_name":"Melody","score":0.6251000165939331},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.490200012922287},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.4620000123977661},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.42559999227523804},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.32440000772476196},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.30809998512268066}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7709000110626221},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6800000071525574},{"id":"https://openalex.org/C43803900","wikidata":"https://www.wikidata.org/wiki/Q170412","display_name":"Melody","level":3,"score":0.6251000165939331},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.490200012922287},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48249998688697815},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.4620000123977661},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.42559999227523804},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.375900000333786},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33880001306533813},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.329800009727478},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.32440000772476196},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.30809998512268066},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3034999966621399},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.2897999882698059},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.288100004196167},{"id":"https://openalex.org/C167310288","wikidata":"https://www.wikidata.org/wiki/Q7564808","display_name":"Sound quality","level":2,"score":0.27970001101493835},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.25940001010894775},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.25380000472068787},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.21282","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.21282","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.21282","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.21282","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Machine":[0],"learning":[1],"techniques,":[2],"such":[3],"as":[4],"Transformers":[5,29,71,107],"and":[6,28,76,122,140,171,174,189,209,218],"Long":[7],"Short-Term":[8],"Memory":[9],"(LSTM)":[10],"networks,":[11],"play":[12],"a":[13,24,64,129,133],"crucial":[14],"role":[15],"in":[16,79],"Symbolic":[17],"Music":[18],"Generation":[19],"(SMG).":[20],"Existing":[21],"literature":[22],"indicates":[23],"difference":[25],"between":[26],"LSTMs":[27,69],"regarding":[30],"their":[31,44,124,192],"ability":[32],"to":[33,102,114,177,197],"model":[34,108],"local":[35,75,98,170],"melodic":[36],"continuity":[37,173],"versus":[38,70],"maintaining":[39],"global":[40,77,109,172],"structural":[41],"coherence.":[42],"However,":[43],"specific":[45],"properties":[46,78,193],"within":[47],"the":[48,87,154,158,165,178,183,204,216],"context":[49],"of":[50,68,153,186],"SMG":[51],"have":[52],"not":[53],"been":[54],"systematically":[55],"studied.":[56],"This":[57],"paper":[58],"addresses":[59],"this":[60,120,223],"gap":[61],"by":[62],"providing":[63],"fine-grained":[65],"comparative":[66],"analysis":[67,121],"for":[72,222],"SMG,":[73],"examining":[74],"detail":[80],"using":[81],"17":[82],"musical":[83],"quality":[84],"metrics":[85],"on":[86,119,157],"Deutschl":[88,159],"dataset.":[89,160],"We":[90,146,201],"find":[91],"that":[92,164],"LSTM":[93,138],"networks":[94],"excel":[95],"at":[96],"capturing":[97],"patterns":[99],"but":[100,112],"fail":[101],"preserve":[103],"long-range":[104],"dependencies,":[105],"while":[106],"structure":[110],"effectively":[111],"tend":[113],"produce":[115],"irregular":[116],"phrasing.":[117],"Based":[118],"leveraging":[123],"respective":[125],"strengths,":[126],"we":[127],"propose":[128],"Hybrid":[130],"architecture":[131],"combining":[132],"Transformer":[134],"Encoder":[135],"with":[136,206],"an":[137],"Decoder":[139],"evaluate":[141],"it":[142],"against":[143],"both":[144],"baselines.":[145,179],"evaluated":[147],"1,000":[148],"generated":[149],"melodies":[150],"from":[151],"each":[152],"three":[155],"architectures":[156],"The":[161],"results":[162],"show":[163],"hybrid":[166],"method":[167],"achieves":[168],"better":[169],"coherence":[175],"compared":[176],"Our":[180],"work":[181],"highlights":[182],"key":[184],"characteristics":[185],"these":[187],"models":[188],"demonstrates":[190],"how":[191],"can":[194],"be":[195],"leveraged":[196],"design":[198],"superior":[199],"models.":[200],"also":[202],"supported":[203],"experiments":[205],"ablation":[207],"studies":[208],"human":[210],"perceptual":[211],"evaluations,":[212],"which":[213],"statistically":[214],"support":[215],"findings":[217],"provide":[219],"robust":[220],"validation":[221],"work.":[224]},"counts_by_year":[],"updated_date":"2026-04-25T08:17:42.794288","created_date":"2026-03-25T00:00:00"}
