{"id":"https://openalex.org/W4416249503","doi":"https://doi.org/10.1109/waspaa66052.2025.11230973","title":"Learning Perceptually Relevant Temporal Envelope Morphing","display_name":"Learning Perceptually Relevant Temporal Envelope Morphing","publication_year":2025,"publication_date":"2025-10-12","ids":{"openalex":"https://openalex.org/W4416249503","doi":"https://doi.org/10.1109/waspaa66052.2025.11230973"},"language":null,"primary_location":{"id":"doi:10.1109/waspaa66052.2025.11230973","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa66052.2025.11230973","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112956663","display_name":"Satvik Dixit","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Satvik Dixit","raw_affiliation_strings":["Carnegie Mellon University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100679183","display_name":"Sungjoon Park","orcid":"https://orcid.org/0000-0002-6484-1170"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sungjoon Park","raw_affiliation_strings":["Carnegie Mellon University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019674079","display_name":"Chris Donahue","orcid":"https://orcid.org/0009-0007-6825-6327"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chris Donahue","raw_affiliation_strings":["Carnegie Mellon University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008695422","display_name":"Laurie M. Heller","orcid":"https://orcid.org/0000-0002-4735-5701"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Laurie M. Heller","raw_affiliation_strings":["Carnegie Mellon University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5112956663"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":1.1201,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.83964581,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.49230000376701355,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.49230000376701355,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.1462000012397766,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10788","display_name":"Neuroscience and Music Perception","score":0.1265999972820282,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/morphing","display_name":"Morphing","score":0.9373000264167786},{"id":"https://openalex.org/keywords/envelope","display_name":"Envelope (radar)","score":0.6462000012397766},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5388000011444092},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.49459999799728394},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.487199991941452},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.46650001406669617},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.4456000030040741},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.4043999910354614},{"id":"https://openalex.org/keywords/spectral-envelope","display_name":"Spectral envelope","score":0.39570000767707825}],"concepts":[{"id":"https://openalex.org/C50637493","wikidata":"https://www.wikidata.org/wiki/Q1136781","display_name":"Morphing","level":2,"score":0.9373000264167786},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7555000185966492},{"id":"https://openalex.org/C65155139","wikidata":"https://www.wikidata.org/wiki/Q5380912","display_name":"Envelope (radar)","level":3,"score":0.6462000012397766},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5784000158309937},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.558899998664856},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5388000011444092},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.49459999799728394},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.487199991941452},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.46650001406669617},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.4456000030040741},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.4043999910354614},{"id":"https://openalex.org/C54926389","wikidata":"https://www.wikidata.org/wiki/Q7575188","display_name":"Spectral envelope","level":2,"score":0.39570000767707825},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.3684999942779541},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.35260000824928284},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.33820000290870667},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3345000147819519},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.32359999418258667},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.31130000948905945},{"id":"https://openalex.org/C88516994","wikidata":"https://www.wikidata.org/wiki/Q1268863","display_name":"Dynamic time warping","level":2,"score":0.2964000105857849},{"id":"https://openalex.org/C203357204","wikidata":"https://www.wikidata.org/wiki/Q1089605","display_name":"Chunking (psychology)","level":2,"score":0.2955999970436096},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2906999886035919},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.28380000591278076},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2793999910354614},{"id":"https://openalex.org/C9940772","wikidata":"https://www.wikidata.org/wiki/Q557399","display_name":"Psychoacoustics","level":3,"score":0.2619999945163727},{"id":"https://openalex.org/C2776539107","wikidata":"https://www.wikidata.org/wiki/Q176501","display_name":"Timbre","level":3,"score":0.2614000141620636},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2529999911785126},{"id":"https://openalex.org/C3020799230","wikidata":"https://www.wikidata.org/wiki/Q160289","display_name":"Auditory perception","level":3,"score":0.2524000108242035},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/waspaa66052.2025.11230973","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa66052.2025.11230973","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320317106","display_name":"Sony","ror":"https://ror.org/04wzv3n59"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1964538581","https://openalex.org/W2013020033","https://openalex.org/W2029654180","https://openalex.org/W2062663442","https://openalex.org/W2088432713","https://openalex.org/W2090135651","https://openalex.org/W2140356304","https://openalex.org/W2150307531","https://openalex.org/W2171590421","https://openalex.org/W2296646900","https://openalex.org/W2396187157","https://openalex.org/W2508228622","https://openalex.org/W2766736793","https://openalex.org/W2945761034","https://openalex.org/W3015591594","https://openalex.org/W3135377667","https://openalex.org/W4240592325","https://openalex.org/W4289538577","https://openalex.org/W4308842396","https://openalex.org/W4361303382","https://openalex.org/W4387969125","https://openalex.org/W4392903391","https://openalex.org/W4396877837","https://openalex.org/W4408345930","https://openalex.org/W4408345986","https://openalex.org/W4408352247","https://openalex.org/W4408354891","https://openalex.org/W4413462963","https://openalex.org/W4415433415"],"related_works":[],"abstract_inverted_index":{"Temporal":[0],"envelope":[1,95,137,160,171],"morphing,":[2,138],"the":[3,8],"process":[4],"of":[5,11,29],"interpolating":[6],"between":[7],"amplitude":[9],"dynamics":[10],"two":[12],"audio":[13,21,56,170],"signals,":[14],"is":[15],"an":[16,153],"emerging":[17],"problem":[18],"in":[19,32,44,52,187],"generative":[20],"systems":[22],"that":[23,135,155,181],"lacks":[24],"sufficient":[25],"perceptual":[26,50,98,133],"grounding.":[27],"Morphing":[28],"temporal":[30,64,71,78,159],"envelopes":[31,65],"a":[33,90,145],"perceptually":[34,82,103,126],"intuitive":[35],"manner":[36],"should":[37],"enable":[38],"new":[39],"methods":[40,186],"for":[41,48,93,168],"sound":[42],"blending":[43],"creative":[45],"media":[46],"and":[47,118,165,176,179,195],"probing":[49],"organization":[51],"psychoacoustics.":[53],"However,":[54],"existing":[55,185],"morphing":[57,96,105],"techniques":[58],"often":[59],"fail":[60],"to":[61,81,124,148,157],"produce":[62],"intermediate":[63,127,190],"when":[66],"input":[67],"sounds":[68],"have":[69],"distinct":[70],"structures;":[72],"many":[73],"morphers":[74],"effectively":[75],"overlay":[76],"both":[77,174],"structures,":[79],"leading":[80],"unnatural":[83],"results.":[84],"In":[85],"this":[86],"paper,":[87],"we":[88,100,130],"introduce":[89],"novel":[91],"workflow":[92],"learning":[94,122],"with":[97],"guidance:":[99],"first":[101],"derive":[102],"grounded":[104],"principles":[106,134],"through":[107],"human":[108],"listening":[109,142],"studies,":[110,143],"then":[111],"synthesize":[112],"large-scale":[113],"datasets":[114,196],"encoding":[115],"these":[116,150],"principles,":[117,151],"finally":[119],"train":[120],"machine":[121],"models":[123],"create":[125],"morphs.":[128,191],"Specifically,":[129],"present:":[131],"(1)":[132],"guide":[136],"derived":[139],"from":[140],"our":[141,182],"(2)":[144],"supervised":[146],"framework":[147],"learn":[149],"(3)":[152],"autoencoder":[154],"learns":[156],"compress":[158],"structures":[161],"into":[162],"latent":[163],"representations,":[164],"(4)":[166],"benchmarks":[167],"evaluating":[169],"morphs,":[172],"using":[173],"synthetic":[175],"naturalistic":[177],"data,":[178],"show":[180],"approach":[183],"outperforms":[184],"producing":[188],"temporally":[189],"All":[192],"code,":[193],"models,":[194],"will":[197],"be":[198],"made":[199],"publicly":[200],"available":[201],"upon":[202],"publication.":[203]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-04-28T14:05:53.105641","created_date":"2025-11-14T00:00:00"}
