{"id":"https://openalex.org/W2475687244","doi":"https://doi.org/10.7916/d8n58mhv","title":"Learning-Based Methods for Comparing Sequences, with Applications to Audio-to-MIDI Alignment and Matching","display_name":"Learning-Based Methods for Comparing Sequences, with Applications to Audio-to-MIDI Alignment and Matching","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2475687244","doi":"https://doi.org/10.7916/d8n58mhv","mag":"2475687244"},"language":"en","primary_location":{"id":"pmh:oai:academiccommons.columbia.edu:10.7916/D8N58MHV","is_oa":false,"landing_page_url":"https://doi.org/10.7916/D8N58MHV","pdf_url":null,"source":{"id":"https://openalex.org/S4306402601","display_name":"Columbia Academic Commons (Columbia University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I78577930","host_organization_name":"Columbia University","host_organization_lineage":["https://openalex.org/I78577930"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Theses"},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.7916/d8n58mhv","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045077843","display_name":"Colin Raffel","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Raffel, Colin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5045077843"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":12.3569,"has_fulltext":false,"cited_by_count":146,"citation_normalized_percentile":{"value":0.99029415,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/midi","display_name":"MIDI","score":0.8179382085800171},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.616100013256073},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5073437094688416},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45648619532585144},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.44946035742759705},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1650891900062561}],"concepts":[{"id":"https://openalex.org/C8112396","wikidata":"https://www.wikidata.org/wiki/Q80535","display_name":"MIDI","level":2,"score":0.8179382085800171},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.616100013256073},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5073437094688416},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45648619532585144},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.44946035742759705},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1650891900062561},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:academiccommons.columbia.edu:10.7916/D8N58MHV","is_oa":false,"landing_page_url":"https://doi.org/10.7916/D8N58MHV","pdf_url":null,"source":{"id":"https://openalex.org/S4306402601","display_name":"Columbia Academic Commons (Columbia University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I78577930","host_organization_name":"Columbia University","host_organization_lineage":["https://openalex.org/I78577930"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Theses"},{"id":"mag:2475687244","is_oa":false,"landing_page_url":"https://academiccommons.columbia.edu/doi/10.7916/D89K4JXX/download","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null},{"id":"doi:10.7916/d8n58mhv","is_oa":true,"landing_page_url":"https://doi.org/10.7916/d8n58mhv","pdf_url":null,"source":{"id":"https://openalex.org/S4306402601","display_name":"Columbia Academic Commons (Columbia University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I78577930","host_organization_name":"Columbia University","host_organization_lineage":["https://openalex.org/I78577930"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.7916/d8n58mhv","is_oa":true,"landing_page_url":"https://doi.org/10.7916/d8n58mhv","pdf_url":null,"source":{"id":"https://openalex.org/S4306402601","display_name":"Columbia Academic Commons (Columbia University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I78577930","host_organization_name":"Columbia University","host_organization_lineage":["https://openalex.org/I78577930"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article-journal"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W584173323","https://openalex.org/W1980497326","https://openalex.org/W2094728533","https://openalex.org/W2107878631","https://openalex.org/W2147800946","https://openalex.org/W2153579005","https://openalex.org/W2155273149","https://openalex.org/W2221409856","https://openalex.org/W2398243923","https://openalex.org/W2403342846","https://openalex.org/W2404620398"],"related_works":["https://openalex.org/W2963681776","https://openalex.org/W2964121744","https://openalex.org/W1556219185","https://openalex.org/W2406198819","https://openalex.org/W2099471712","https://openalex.org/W2064675550","https://openalex.org/W2963575853","https://openalex.org/W2919624000","https://openalex.org/W2898148140","https://openalex.org/W2963403868","https://openalex.org/W2752134738","https://openalex.org/W1819710477","https://openalex.org/W2746068898","https://openalex.org/W2949382160","https://openalex.org/W2792210438","https://openalex.org/W2559110679","https://openalex.org/W1959608418","https://openalex.org/W2951535099","https://openalex.org/W2753868141","https://openalex.org/W2296724634"],"abstract_inverted_index":{"Sequences":[0],"of":[1,8,15,61,73,76,80,91,145,165,230,250,262,269,278,285,301,309,319,335,351,413,433,453,475,484,490,499,515],"feature":[2,129,252,264,287],"vectors":[3,265,288,372],"are":[4,245],"a":[5,13,17,32,70,162,188,201,226,257,276,290,306,326,344,391,450,497],"natural":[6],"way":[7],"representing":[9],"temporal":[10],"data.":[11,149],"Given":[12],"database":[14,24,308,321,417],"sequences,":[16],"fundamental":[18],"task":[19,300,432],"is":[20,27,121,205,295,380],"to":[21,31,49,69,86,106,155,181,266,281,289,305,322,338,370,429,439],"find":[22,182],"the":[23,28,59,88,128,183,299,320,336,348,430,440,476,482,485,488,491,519],"entry":[25],"which":[26,204,397],"most":[29],"similar":[30],"query.":[33],"In":[34],"this":[35,220,315,387,406],"thesis,":[36],"we":[37,55,312,389,425,510,524],"present":[38],"learning-based":[39,520],"methods":[40,523],"for":[41,112,186,240,259,357,457,501,504],"efficiently":[42,410],"and":[43,95,115,132,143,147,159,167,208,214,248,469,487,518],"accurately":[44],"comparing":[45],"sequences":[46,244,261,268,369],"in":[47,195,200,225,373],"order":[48],"facilitate":[50],"large-scale":[51,227,241,359],"sequence":[52,378,521],"search.":[53,360],"Throughout,":[54],"will":[56],"focus":[57],"on":[58,298,481],"problem":[60],"matching":[62,116,144,196,302,434],"MIDI":[63,93,146,168,303,437,447],"files":[64,94,304,438],"(a":[65],"digital":[66],"score":[67,178],"format)":[68],"large":[71,307],"collection":[72],"audio":[74,96,148,166,310,415,458],"recordings":[75,97],"music.":[77],"The":[78,444,473],"combination":[79],"our":[81,414,516],"proposed":[82],"approaches":[83],"enables":[84,317],"us":[85],"create":[87],"largest":[89],"corpus":[90],"paired":[92,283],"ever":[98],"assembled.":[99],"Dynamic":[100],"time":[101,337],"warping":[102],"(DTW)":[103],"has":[104],"proven":[105],"be":[107,236,323,364],"an":[108,173,374],"extremely":[109,409],"effective":[110],"method":[111,185,258],"both":[113,212,480],"aligning":[114],"sequences.":[117,402],"However,":[118],"its":[119,133,355],"performance":[120,224],"heavily":[122],"affected":[123],"by":[124,366,382],"factors":[125],"such":[126],"as":[127,193],"representation":[130],"used":[131],"adjustable":[134],"parameters.":[135],"We":[136,170,216,254,403,494],"therefore":[137,255,495],"investigate":[138],"automatically":[139],"optimizing":[140],"DTW-based":[141,202],"alignment":[142,190,213],"Our":[150,272],"approach":[151,273,407],"uses":[152,514],"Bayesian":[153],"optimization":[154],"tune":[156],"system":[157,203,221],"design":[158],"parameters":[160],"over":[161,176],"synthetically-created":[163],"dataset":[164,517],"pairs.":[169],"then":[171],"perform":[172],"exhaustive":[174],"search":[175,242],"DTW":[177,234,352],"normalization":[179],"techniques":[180],"optimal":[184],"reporting":[187],"reliable":[189],"confidence":[191],"score,":[192],"required":[194],"tasks.":[197,508],"This":[198,361,463],"results":[199],"conceptually":[206],"simple":[207],"highly":[209],"accurate":[210],"at":[211],"matching.":[215],"also":[217],"verify":[218],"that":[219,314,405],"achieves":[222],"high":[223,419],"qualitative":[228],"evaluation":[229],"real-world":[231],"alignments.":[232],"Unfortunately,":[233],"can":[235,363,398,408,464],"far":[237],"too":[238],"inefficient":[239],"when":[243,341],"very":[246,358],"long":[247,401],"consist":[249],"high-dimensional":[251],"vectors.":[253,271],"propose":[256,390],"mapping":[260,367],"continuously-valued":[263],"downsampled":[267],"binary":[270],"involves":[274],"training":[275],"pair":[277],"convolutional":[279],"networks":[280],"map":[282],"groups":[284],"subsequent":[286],"Hamming":[291],"space":[292,376],"where":[293,377],"similarity":[294,379],"preserved.":[296],"Evaluated":[297],"recordings,":[311],"show":[313,404],"technique":[316],"99.99\\%":[318],"discarded":[324],"with":[325,343,418],"modest":[327],"false":[328],"reject":[329],"rate":[330],"while":[331],"only":[332],"requiring":[333],"0.2\\%":[334],"compute.":[339],"Even":[340],"sped-up":[342],"more":[345],"efficient":[346],"representation,":[347],"quadratic":[349],"complexity":[350],"greatly":[353],"hinders":[354],"feasibility":[356],"cost":[362],"avoided":[365],"entire":[368],"fixed-length":[371],"embedded":[375],"approximated":[381],"Euclidean":[383],"distance.":[384],"To":[385],"achieve":[386],"embedding,":[388],"feed-forward":[392],"attention-based":[393],"neural":[394],"network":[395],"model":[396],"integrate":[399],"arbitrarily":[400],"prune":[411],"90\\%":[412],"recording":[416],"confidence.":[420],"After":[421],"developing":[422],"these":[423],"approaches,":[424],"applied":[426],"them":[427],"together":[428],"practical":[431],"178,561":[435],"unique":[436],"Million":[441],"Song":[442],"Dataset.":[443],"resulting":[445,477],"``Lakh":[446],"Dataset''":[448],"provides":[449],"potential":[451,512],"bounty":[452],"ground":[454],"truth":[455],"information":[456,461,503],"content-based":[459,506],"music":[460],"retrieval.":[462],"include":[465],"transcription,":[466],"meter,":[467],"lyrics,":[468],"high-level":[470],"musicological":[471],"features.":[472],"reliability":[474,500],"annotations":[478],"depends":[479],"quality":[483],"transcription":[486],"accuracy":[489],"score-to-audio":[492],"alignment.":[493],"establish":[496],"baseline":[498],"score-derived":[502],"different":[505],"MIR":[507],"Finally,":[509],"discuss":[511],"future":[513],"comparison":[522],"developed.":[525]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":15},{"year":2022,"cited_by_count":11},{"year":2021,"cited_by_count":38},{"year":2020,"cited_by_count":19},{"year":2019,"cited_by_count":18},{"year":2018,"cited_by_count":20},{"year":2017,"cited_by_count":10},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
