{"id":"https://openalex.org/W4409156693","doi":"https://doi.org/10.1109/ieeeconf60004.2024.10943074","title":"Time-Frequency Audio Similarity Using Optimal Transport","display_name":"Time-Frequency Audio Similarity Using Optimal Transport","publication_year":2024,"publication_date":"2024-10-27","ids":{"openalex":"https://openalex.org/W4409156693","doi":"https://doi.org/10.1109/ieeeconf60004.2024.10943074"},"language":"en","primary_location":{"id":"doi:10.1109/ieeeconf60004.2024.10943074","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ieeeconf60004.2024.10943074","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 58th Asilomar Conference on Signals, Systems, and Computers","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.1109/IEEECONF60004.2024.10943074","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Linda Fabiani","orcid":null},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Linda Fabiani","raw_affiliation_strings":["Aalto University,Dept. of Information and Communications Engineering,Finland"],"affiliations":[{"raw_affiliation_string":"Aalto University,Dept. of Information and Communications Engineering,Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079657543","display_name":"Sebastian J. Schlecht","orcid":"https://orcid.org/0000-0001-8858-4642"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sebastian J. Schlecht","raw_affiliation_strings":["Friedrich-Alexander-Universit&#x00E4;t Erlangen-N&#x00FC;rnberg,Dept. of Electrical and Computer Engineering,Germany"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander-Universit&#x00E4;t Erlangen-N&#x00FC;rnberg,Dept. of Electrical and Computer Engineering,Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013286528","display_name":"Filip Elvander","orcid":"https://orcid.org/0000-0003-1857-2173"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Filip Elvander","raw_affiliation_strings":["Aalto University,Dept. of Information and Communications Engineering,Finland"],"affiliations":[{"raw_affiliation_string":"Aalto University,Dept. of Information and Communications Engineering,Finland","institution_ids":["https://openalex.org/I9927081"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I9927081"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.31929434,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1414","last_page":"1417"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.65196293592453},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5858156681060791},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5097326636314392},{"id":"https://openalex.org/keywords/audio-signal-processing","display_name":"Audio signal processing","score":0.4824257791042328},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.37717679142951965},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.25446194410324097},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.19207006692886353}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.65196293592453},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5858156681060791},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5097326636314392},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.4824257791042328},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.37717679142951965},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25446194410324097},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.19207006692886353},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ieeeconf60004.2024.10943074","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ieeeconf60004.2024.10943074","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 58th Asilomar Conference on Signals, Systems, and Computers","raw_type":"proceedings-article"},{"id":"pmh:oai:aaltodoc.aalto.fi:123456789/135014","is_oa":true,"landing_page_url":"https://doi.org/10.1109/IEEECONF60004.2024.10943074","pdf_url":null,"source":{"id":"https://openalex.org/S4306401663","display_name":"Aaltodoc (Aalto University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9927081","host_organization_name":"Aalto University","host_organization_lineage":["https://openalex.org/I9927081"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"acceptedVersion"}],"best_oa_location":{"id":"pmh:oai:aaltodoc.aalto.fi:123456789/135014","is_oa":true,"landing_page_url":"https://doi.org/10.1109/IEEECONF60004.2024.10943074","pdf_url":null,"source":{"id":"https://openalex.org/S4306401663","display_name":"Aaltodoc (Aalto University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9927081","host_organization_name":"Aalto University","host_organization_lineage":["https://openalex.org/I9927081"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"acceptedVersion"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2130327601","https://openalex.org/W2767414357","https://openalex.org/W2890043615","https://openalex.org/W2963006832","https://openalex.org/W2990440871","https://openalex.org/W2999010383","https://openalex.org/W3015338123","https://openalex.org/W3116275044","https://openalex.org/W3182348197","https://openalex.org/W4206471589","https://openalex.org/W4233762729","https://openalex.org/W4323064860","https://openalex.org/W4372348434","https://openalex.org/W4388691923","https://openalex.org/W4392910551","https://openalex.org/W6682962330","https://openalex.org/W6727623532","https://openalex.org/W6736723571","https://openalex.org/W6753855596","https://openalex.org/W6787485125","https://openalex.org/W6795972600"],"related_works":["https://openalex.org/W2289868279","https://openalex.org/W2296317756","https://openalex.org/W4231351862","https://openalex.org/W4212794605","https://openalex.org/W4315836293","https://openalex.org/W4243888788","https://openalex.org/W2769884427","https://openalex.org/W2088690926","https://openalex.org/W2157165686","https://openalex.org/W2371215329"],"abstract_inverted_index":{"In":[0,24],"audio":[1,10,37],"signal":[2,68,85,126,138],"processing,":[3],"having":[4],"an":[5,16],"effective":[6],"metric":[7,62],"for":[8,32,64,114,136],"comparing":[9],"data":[11],"is":[12],"essential":[13],"to":[14,59,98],"ensure":[15],"accurate":[17],"understanding":[18],"of":[19,45,95,125,144],"sound":[20],"properties":[21],"and":[22,52,106,140],"attributes.":[23],"this":[25],"work,":[26],"we":[27,112],"formulate":[28],"two":[29],"novel":[30],"approaches":[31],"measuring":[33,134],"the":[34,40,61,74,80,84,88,93,96,142,148],"similarity":[35],"between":[36],"signals":[38],"in":[39,83,132,147],"time-frequency":[41],"domain,":[42],"taking":[43],"advantage":[44],"principles":[46],"from":[47],"classical":[48],"optimal":[49,57],"transport":[50,58],"problems":[51],"sliced":[53,89],"Wasserstein":[54,90],"distances.":[55],"Using":[56],"construct":[60],"allows":[63],"a":[65,115],"more":[66,116],"robust":[67],"content":[69],"comparison,":[70],"considering":[71],"not":[72],"only":[73],"signals'":[75],"individual":[76],"elements":[77],"but":[78],"also":[79],"global":[81],"distribution":[82],"space.":[86],"Additionally,":[87],"methods":[91],"expand":[92],"use":[94],"distances":[97,135],"high":[99],"dimensional":[100],"problems.":[101],"By":[102],"integrating":[103],"both":[104],"time":[105],"frequency":[107],"aspects":[108],"into":[109],"our":[110],"metrics,":[111],"aim":[113],"comprehensive":[117],"comparison":[118],"that":[119],"can":[120],"better":[121],"handle":[122],"various":[123],"types":[124],"distortions.":[127],"Results":[128],"show":[129],"promising":[130],"behavior":[131],"accurately":[133],"increasing":[137],"differences":[139],"avoiding":[141],"presence":[143],"local":[145],"minima":[146],"loss":[149],"curves.":[150]},"counts_by_year":[],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-10T00:00:00"}
