{"id":"https://openalex.org/W7156652767","doi":"https://doi.org/10.48550/arxiv.2604.24386","title":"An event-based sequence modeling approach to recognizing non-triad chords with oversegmentation minimization","display_name":"An event-based sequence modeling approach to recognizing non-triad chords with oversegmentation minimization","publication_year":2026,"publication_date":"2026-04-27","ids":{"openalex":"https://openalex.org/W7156652767","doi":"https://doi.org/10.48550/arxiv.2604.24386"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.24386","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24386","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.24386","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013331634","display_name":"Leekyung Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Leekyung","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134764519","display_name":"Jonghun Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Jonghun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.0010999999940395355,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10788","display_name":"Neuroscience and Music Perception","score":0.0006000000284984708,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/chord","display_name":"Chord (peer-to-peer)","score":0.939300000667572},{"id":"https://openalex.org/keywords/minification","display_name":"Minification","score":0.489300012588501},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.48750001192092896},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.41819998621940613},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.400299996137619},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.329800009727478},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.3271999955177307}],"concepts":[{"id":"https://openalex.org/C194147245","wikidata":"https://www.wikidata.org/wiki/Q1076368","display_name":"Chord (peer-to-peer)","level":2,"score":0.939300000667572},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6313999891281128},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5555999875068665},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.489300012588501},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.48750001192092896},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.41819998621940613},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.41040000319480896},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.400299996137619},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.329800009727478},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.3271999955177307},{"id":"https://openalex.org/C35639132","wikidata":"https://www.wikidata.org/wiki/Q7452468","display_name":"Sequence labeling","level":3,"score":0.32260000705718994},{"id":"https://openalex.org/C5274069","wikidata":"https://www.wikidata.org/wiki/Q2285707","display_name":"Categorical variable","level":2,"score":0.3165000081062317},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.3163999915122986},{"id":"https://openalex.org/C189645446","wikidata":"https://www.wikidata.org/wiki/Q350865","display_name":"Mirroring","level":2,"score":0.3142000138759613},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.3012000024318695},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.29510000348091125},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2791999876499176},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2773999869823456}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.24386","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24386","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.24386","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24386","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Automatic":[0],"chord":[1,6,52,69,93,105,116,134],"recognition":[2,106,135],"(ACR)":[3],"extracts":[4],"time-aligned":[5,92],"labels":[7],"from":[8],"music":[9],"audio":[10],"recordings.":[11],"Despite":[12],"recent":[13],"advances,":[14],"ACR":[15,44],"still":[16],"struggles":[17],"with":[18,109],"oversegmentation,":[19],"data":[20],"scarcity,":[21],"and":[22,83,107,114,129],"imbalance,":[23],"especially":[24],"in":[25,35,103],"recognizing":[26],"complex":[27,113],"chords":[28],"such":[29],"as":[30,45],"non-triads,":[31],"which":[32],"are":[33,54],"unpopular":[34],"existing":[36],"datasets.":[37],"To":[38],"address":[39],"these":[40],"challenges,":[41],"we":[42],"reformulate":[43],"a":[46],"segment-level":[47,124],"sequence-to-sequence":[48],"prediction":[49],"task,":[50],"where":[51],"sequences":[53],"predicted":[55],"auto-regressively":[56],"rather":[57],"than":[58],"frame":[59],"by":[60,67],"frame.":[61],"This":[62],"design":[63],"mitigates":[64],"excessive":[65],"segmentation":[66],"detecting":[68],"changes":[70],"only":[71],"at":[72],"segment":[73],"boundaries.":[74],"We":[75],"further":[76],"introduce":[77],"two":[78],"types":[79],"of":[80,123],"token":[81],"representations":[82],"an":[84],"encoder":[85],"pre-training":[86],"method,":[87],"both":[88,104],"specifically":[89],"designed":[90],"for":[91,112,132],"modeling.":[94],"Experimental":[95],"results":[96],"show":[97],"that":[98],"our":[99],"model":[100],"improves":[101],"performance":[102],"segmentation,":[108],"notable":[110],"gains":[111],"infrequent":[115],"types.":[117],"These":[118],"findings":[119],"demonstrate":[120],"the":[121],"effectiveness":[122],"sequence":[125],"modeling,":[126],"structured":[127],"tokenization,":[128],"representation":[130],"learning":[131],"advancing":[133],"systems.":[136]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-29T00:00:00"}
