{"id":"https://openalex.org/W7108705790","doi":"https://doi.org/10.5281/zenodo.17811376","title":"TOMI: Transforming and Organizing Music Ideas for Multi-Track Compositions with Full-Song Structure","display_name":"TOMI: Transforming and Organizing Music Ideas for Multi-Track Compositions with Full-Song Structure","publication_year":2025,"publication_date":"2025-09-21","ids":{"openalex":"https://openalex.org/W7108705790","doi":"https://doi.org/10.5281/zenodo.17811376"},"language":null,"primary_location":{"id":"doi:10.5281/zenodo.17811376","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17811376","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.17811376","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Qi He","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Qi He","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Gus Xia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gus Xia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Ziyu Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ziyu Wang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.60850675,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.6808000206947327,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.6808000206947327,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.15520000457763672,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.06620000302791595,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/midi","display_name":"MIDI","score":0.6798999905586243},{"id":"https://openalex.org/keywords/digital-audio","display_name":"Digital audio","score":0.6017000079154968},{"id":"https://openalex.org/keywords/pop-music-automation","display_name":"Pop music automation","score":0.5230000019073486},{"id":"https://openalex.org/keywords/musical","display_name":"Musical","score":0.503600001335144},{"id":"https://openalex.org/keywords/electronic-music","display_name":"Electronic music","score":0.4970000088214874},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.49390000104904175},{"id":"https://openalex.org/keywords/aside","display_name":"Aside","score":0.4828000068664551},{"id":"https://openalex.org/keywords/musical-composition","display_name":"Musical composition","score":0.4796999990940094}],"concepts":[{"id":"https://openalex.org/C8112396","wikidata":"https://www.wikidata.org/wiki/Q80535","display_name":"MIDI","level":2,"score":0.6798999905586243},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.623199999332428},{"id":"https://openalex.org/C87687168","wikidata":"https://www.wikidata.org/wiki/Q173114","display_name":"Digital audio","level":4,"score":0.6017000079154968},{"id":"https://openalex.org/C73520026","wikidata":"https://www.wikidata.org/wiki/Q7229091","display_name":"Pop music automation","level":4,"score":0.5230000019073486},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.503600001335144},{"id":"https://openalex.org/C532620858","wikidata":"https://www.wikidata.org/wiki/Q9778","display_name":"Electronic music","level":2,"score":0.4970000088214874},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.49390000104904175},{"id":"https://openalex.org/C2778120072","wikidata":"https://www.wikidata.org/wiki/Q2858150","display_name":"Aside","level":2,"score":0.4828000068664551},{"id":"https://openalex.org/C109568592","wikidata":"https://www.wikidata.org/wiki/Q207628","display_name":"Musical composition","level":3,"score":0.4796999990940094},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4708000123500824},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.4180999994277954},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.41449999809265137},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4075999855995178},{"id":"https://openalex.org/C196017715","wikidata":"https://www.wikidata.org/wiki/Q862597","display_name":"Musical form","level":3,"score":0.3619999885559082},{"id":"https://openalex.org/C40231798","wikidata":"https://www.wikidata.org/wiki/Q1333743","display_name":"Composition (language)","level":2,"score":0.33889999985694885},{"id":"https://openalex.org/C114611597","wikidata":"https://www.wikidata.org/wiki/Q373342","display_name":"Popular music","level":2,"score":0.3118000030517578},{"id":"https://openalex.org/C34259666","wikidata":"https://www.wikidata.org/wiki/Q1122550","display_name":"Computer music","level":3,"score":0.31040000915527344},{"id":"https://openalex.org/C196754913","wikidata":"https://www.wikidata.org/wiki/Q27333678","display_name":"Music technology","level":3,"score":0.2930999994277954},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.28049999475479126},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.27559998631477356},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.2728999853134155},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.25760000944137573},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2549999952316284}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.17811376","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17811376","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.17811376","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17811376","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[{"score":0.6194310188293457,"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Hierarchical":[0],"planning":[1],"is":[2,109],"a":[3,46,61,70,80,85],"powerful":[4],"approach":[5,63,141],"to":[6,151],"model":[7,72,108,125],"long":[8],"sequences":[9],"structurally.":[10],"Aside":[11],"from":[12],"considering":[13],"hierarchies":[14],"in":[15,64],"the":[16,123,127],"temporal":[17],"structure":[18],"of":[19,111],"music,":[20],"this":[21,50],"paper":[22],"explores":[23],"an":[24],"even":[25],"more":[26],"important":[27],"aspect:":[28],"concept":[29],"hierarchy,":[30],"which":[31],"involves":[32],"generating":[33,112],"music":[34,66,115,145],"ideas,":[35],"transforming":[36],"them,":[37],"and":[38,44,56,68,103,119],"ultimately":[39],"organizing":[40],"them\u2014across":[41],"musical":[42],"time":[43],"space\u2014into":[45],"complete":[47],"composition.":[48],"To":[49],"end,":[51],"we":[52,78,120],"introduce":[53],"TOMI":[54],"(Transforming":[55],"Organizing":[57],"Music":[58],"Ideas)":[59],"as":[60],"novel":[62],"deep":[65],"generation":[67],"develop":[69],"TOMI-based":[71,124],"via":[73,84],"instruction-tuned":[74],"foundation":[75],"LLM.":[76],"Formally,":[77],"represent":[79],"multi-track":[81,113],"composition":[82],"process":[83],"sparse,":[86],"four-dimensional":[87],"space":[88],"characterized":[89],"by":[90],"clips":[91],"(short":[92],"audio":[93,130],"or":[94],"MIDI":[95],"segments),":[96],"sections":[97],"(temporal":[98],"positions),":[99],"tracks":[100],"(instrument":[101],"layers),":[102],"transformations":[104],"(elaboration":[105],"methods).":[106],"Our":[107],"capable":[110],"electronic":[114,144],"with":[116,126,146],"full-song":[117],"structure,":[118],"further":[121],"integrate":[122],"REAPER":[128],"digital":[129],"workstation,":[131],"enabling":[132],"interactive":[133],"human-AI":[134],"co-creation.":[135],"Experimental":[136],"results":[137],"demonstrate":[138],"that":[139],"our":[140],"produces":[142],"higher-quality":[143],"stronger":[147],"structural":[148],"coherence":[149],"compared":[150],"baselines.":[152]},"counts_by_year":[],"updated_date":"2025-12-05T23:25:22.460635","created_date":"2025-12-05T00:00:00"}
