{"id":"https://openalex.org/W6930617329","doi":"https://doi.org/10.5281/zenodo.14877423","title":"ST-ITO: Controlling Audio Effects for Style Transfer With Inference-Time Optimization","display_name":"ST-ITO: Controlling Audio Effects for Style Transfer With Inference-Time Optimization","publication_year":2024,"publication_date":"2024-11-10","ids":{"openalex":"https://openalex.org/W6930617329","doi":"https://doi.org/10.5281/zenodo.14877423"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.14877423","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14877423","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.14877423","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Christian J. Steinmetz","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Christian J. Steinmetz","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Shubhr Singh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shubhr Singh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Marco Comunit\u00e0","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marco Comunit\u00e0","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ilias Ibnyahya","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ilias Ibnyahya","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Shanxin Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shanxin Yuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Emmanouil Benetos","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Emmanouil Benetos","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Joshua D. Reiss","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Joshua D. Reiss","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38864546,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T12180","display_name":"Polar Research and Ecology","score":0.6129999756813049,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12180","display_name":"Polar Research and Ecology","score":0.6129999756813049,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10089","display_name":"Avian ecology and behavior","score":0.06040000170469284,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10659","display_name":"Marine animal studies overview","score":0.05689999833703041,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5476999878883362},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5410000085830688},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5210999846458435},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5163000226020813},{"id":"https://openalex.org/keywords/production","display_name":"Production (economics)","score":0.48559999465942383},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.448199987411499},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4350999891757965},{"id":"https://openalex.org/keywords/transfer","display_name":"Transfer (computing)","score":0.4311999976634979},{"id":"https://openalex.org/keywords/audio-signal-processing","display_name":"Audio signal processing","score":0.42660000920295715}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.741100013256073},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5476999878883362},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5410000085830688},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5210999846458435},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5163000226020813},{"id":"https://openalex.org/C2778348673","wikidata":"https://www.wikidata.org/wiki/Q739302","display_name":"Production (economics)","level":2,"score":0.48559999465942383},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.47189998626708984},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.448199987411499},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4350999891757965},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.4311999976634979},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.42660000920295715},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4133000075817108},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.3853999972343445},{"id":"https://openalex.org/C81299745","wikidata":"https://www.wikidata.org/wiki/Q334269","display_name":"Transfer function","level":2,"score":0.3747999966144562},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3693999946117401},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36559998989105225},{"id":"https://openalex.org/C167940747","wikidata":"https://www.wikidata.org/wiki/Q63727227","display_name":"Audio signal flow","level":5,"score":0.33649998903274536},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.3183000087738037},{"id":"https://openalex.org/C167310288","wikidata":"https://www.wikidata.org/wiki/Q7564808","display_name":"Sound quality","level":2,"score":0.3034000098705292},{"id":"https://openalex.org/C128422554","wikidata":"https://www.wikidata.org/wiki/Q20077126","display_name":"Sound recording and reproduction","level":2,"score":0.29809999465942383},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.29440000653266907},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.2721000015735626},{"id":"https://openalex.org/C2776445246","wikidata":"https://www.wikidata.org/wiki/Q1792644","display_name":"Style (visual arts)","level":2,"score":0.2639999985694885},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25769999623298645},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.2531000077724457},{"id":"https://openalex.org/C160372630","wikidata":"https://www.wikidata.org/wiki/Q4819855","display_name":"Audio analyzer","level":5,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.14877423","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14877423","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.14877423","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14877423","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.535530686378479,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Audio":[0],"production":[1,112,139,150,170],"style":[2,140,151,154,174],"transfer":[3,155,175],"is":[4],"the":[5,53,80,132],"task":[6],"of":[7,33,50,83,94,110,178],"processing":[8],"an":[9,75,84],"input":[10],"to":[11,26,131,147,168],"impart":[12],"stylistic":[13],"elements":[14],"from":[15],"a":[16,23,31,47,107,118,127,144],"reference":[17],"recording.":[18],"Existing":[19],"approaches":[20,38],"often":[21],"train":[22,116],"neural":[24],"network":[25],"estimate":[27],"control":[28,46,93,177],"parameters":[29],"for":[30,137],"set":[32,49],"audio":[34,85,96,111,138,149,162,169,180],"effects.":[35,103,181],"However,":[36],"these":[37],"are":[39],"limited":[40,133],"in":[41],"that":[42,77,160],"they":[43],"can":[44],"only":[45],"fixed":[48],"effects,":[51],"where":[52],"effects":[54],"must":[55],"be":[56],"differentiable":[57],"or":[58],"otherwise":[59],"employ":[60],"specialized":[61],"training":[62],"techniques.":[63],"In":[64],"this":[65],"work,":[66],"we":[67,115,142],"introduce":[68,143],"ST-ITO,":[69],"Style":[70],"Transfer":[71],"with":[72,126],"Inference-Time":[73],"Optimization,":[74],"approach":[76,105],"instead":[78],"searches":[79],"parameter":[81],"space":[82],"effect":[86,97],"chain":[87],"at":[88],"inference.":[89],"This":[90,157],"method":[91],"enables":[92,172],"arbitrary":[95,179],"chains,":[98],"including":[99],"unseen":[100],"and":[101,120,153,171],"non-differentiable":[102],"Our":[104],"employs":[106],"learned":[108],"metric":[109],"style,":[113],"which":[114],"through":[117],"simple":[119],"scalable":[121],"self-supervised":[122],"pretraining":[123],"strategy,":[124],"along":[125],"gradient-free":[128],"optimizer.":[129],"Due":[130],"existing":[134],"evaluation":[135,158],"methods":[136],"transfer,":[141],"multi-part":[145],"benchmark":[146],"evaluate":[148],"metrics":[152],"systems.":[156],"demonstrates":[159],"our":[161],"representation":[163],"better":[164],"captures":[165],"attributes":[166],"related":[167],"expressive":[173],"via":[176]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
