{"id":"https://openalex.org/W4372346311","doi":"https://doi.org/10.1109/icassp49357.2023.10096517","title":"Visual Onoma-to-Wave: Environmental Sound Synthesis from Visual Onomatopoeias and Sound-Source Images","display_name":"Visual Onoma-to-Wave: Environmental Sound Synthesis from Visual Onomatopoeias and Sound-Source Images","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372346311","doi":"https://doi.org/10.1109/icassp49357.2023.10096517"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096517","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096517","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031777827","display_name":"Hien Ohnaka","orcid":null},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]},{"id":"https://openalex.org/I4210142609","display_name":"National Institute of Technology, Tokuyama College","ror":"https://ror.org/046jx2h22","country_code":"JP","type":"education","lineage":["https://openalex.org/I4210120810","https://openalex.org/I4210142609"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Hien Ohnaka","raw_affiliation_strings":["National Institute of Technology,Tokuyama College,Japan","Tokuyama College, National Institute of Technology, Japan","The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Technology,Tokuyama College,Japan","institution_ids":["https://openalex.org/I4210142609"]},{"raw_affiliation_string":"Tokuyama College, National Institute of Technology, Japan","institution_ids":["https://openalex.org/I4210142609"]},{"raw_affiliation_string":"The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013050263","display_name":"Shinnosuke Takamichi","orcid":"https://orcid.org/0000-0003-0520-7847"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shinnosuke Takamichi","raw_affiliation_strings":["The University of Tokyo,Japan","The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo,Japan","institution_ids":["https://openalex.org/I74801974"]},{"raw_affiliation_string":"The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009991655","display_name":"Keisuke Imoto","orcid":"https://orcid.org/0000-0002-0703-8293"},"institutions":[{"id":"https://openalex.org/I133984924","display_name":"Doshisha University","ror":"https://ror.org/01fxdkm29","country_code":"JP","type":"education","lineage":["https://openalex.org/I133984924"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Keisuke Imoto","raw_affiliation_strings":["Doshisha University,Japan","Doshisha University, Japan"],"affiliations":[{"raw_affiliation_string":"Doshisha University,Japan","institution_ids":["https://openalex.org/I133984924"]},{"raw_affiliation_string":"Doshisha University, Japan","institution_ids":["https://openalex.org/I133984924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113959532","display_name":"Yuki Okamoto","orcid":null},"institutions":[{"id":"https://openalex.org/I135768898","display_name":"Ritsumeikan University","ror":"https://ror.org/0197nmd03","country_code":"JP","type":"education","lineage":["https://openalex.org/I135768898","https://openalex.org/I4390039241"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yuki Okamoto","raw_affiliation_strings":["Ritsumeikan University,Japan","Ritsumeikan University, Japan"],"affiliations":[{"raw_affiliation_string":"Ritsumeikan University,Japan","institution_ids":["https://openalex.org/I135768898"]},{"raw_affiliation_string":"Ritsumeikan University, Japan","institution_ids":["https://openalex.org/I135768898"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028262563","display_name":"Kazuki Fujii","orcid":null},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kazuki Fujii","raw_affiliation_strings":["The University of Tokyo,Japan","The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo,Japan","institution_ids":["https://openalex.org/I74801974"]},{"raw_affiliation_string":"The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003814223","display_name":"Hiroshi Saruwatari","orcid":"https://orcid.org/0000-0003-0876-5617"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hiroshi Saruwatari","raw_affiliation_strings":["The University of Tokyo,Japan","The University of Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo,Japan","institution_ids":["https://openalex.org/I74801974"]},{"raw_affiliation_string":"The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5031777827"],"corresponding_institution_ids":["https://openalex.org/I4210142609","https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":1.2226,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.78729264,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13310","display_name":"Subtitles and Audiovisual Media","score":0.9592000246047974,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/onomatopoeia","display_name":"Onomatopoeia","score":0.9790857434272766},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6853646636009216},{"id":"https://openalex.org/keywords/sound","display_name":"Sound (geography)","score":0.6428304314613342},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5530135631561279},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.48837774991989136},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46117058396339417},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3652702569961548},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.33450984954833984},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.3036755323410034}],"concepts":[{"id":"https://openalex.org/C22432085","wikidata":"https://www.wikidata.org/wiki/Q170239","display_name":"Onomatopoeia","level":2,"score":0.9790857434272766},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6853646636009216},{"id":"https://openalex.org/C203718221","wikidata":"https://www.wikidata.org/wiki/Q491713","display_name":"Sound (geography)","level":2,"score":0.6428304314613342},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5530135631561279},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.48837774991989136},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46117058396339417},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3652702569961548},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.33450984954833984},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.3036755323410034},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096517","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096517","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"display_name":"Life in Land","id":"https://metadata.un.org/sdg/15"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W20241248","https://openalex.org/W1969746716","https://openalex.org/W2021036184","https://openalex.org/W2058341666","https://openalex.org/W2314423504","https://openalex.org/W2935170919","https://openalex.org/W2956454433","https://openalex.org/W2963609956","https://openalex.org/W2963807156","https://openalex.org/W2964345931","https://openalex.org/W2983429428","https://openalex.org/W3007100714","https://openalex.org/W3033411150","https://openalex.org/W3041523532","https://openalex.org/W3092028330","https://openalex.org/W3094502228","https://openalex.org/W3127705815","https://openalex.org/W3161704465","https://openalex.org/W3166396011","https://openalex.org/W3214281017","https://openalex.org/W4213207493","https://openalex.org/W4300980117","https://openalex.org/W4312817644","https://openalex.org/W4319862704","https://openalex.org/W4367359628","https://openalex.org/W6600817012","https://openalex.org/W6736996214","https://openalex.org/W6778823374","https://openalex.org/W6780837575","https://openalex.org/W6783867762","https://openalex.org/W6784333009","https://openalex.org/W6791353385","https://openalex.org/W6799028840","https://openalex.org/W6810810045","https://openalex.org/W6839857308","https://openalex.org/W6845479124"],"related_works":["https://openalex.org/W2315379384","https://openalex.org/W3197205159","https://openalex.org/W2313695185","https://openalex.org/W4280511947","https://openalex.org/W2369458119","https://openalex.org/W2767010592","https://openalex.org/W2327292841","https://openalex.org/W2993672003","https://openalex.org/W2560876968","https://openalex.org/W4372258428"],"abstract_inverted_index":{"We":[0,127],"propose":[1,100,129],"a":[2,18,22,80,130],"method":[3,111,133],"for":[4,103],"synthesizing":[5],"environmental":[6,40,104,157],"sounds":[7,41,54,158],"from":[8,42,107,159],"visually":[9],"represented":[10],"onomatopoeias":[11,139],"and":[12,58,120,162],"sound":[13,23,105],"sources.":[14],"An":[15,147],"onomatopoeia":[16,45,63],"is":[17,72,92],"word":[19],"that":[20,71,151],"imitates":[21],"structure,":[24],"i.e.,":[25],"the":[26,43,76,84,87,117,124,136,142,152],"text":[27,65,119,161],"representation":[28,91],"of":[29,53,66,83,89,116,138,144],"sound.":[30,126],"From":[31],"this":[32,90],"perspective,":[33],"onoma-to-wave":[34,102],"has":[35],"been":[36],"proposed":[37],"to":[38,94,123,140],"synthesize":[39,95,155],"desired":[44],"texts.":[46],"Onomatopoeias":[47],"have":[48],"another":[49],"representation:":[50],"visual-text":[51],"representations":[52],"in":[55,75],"comics,":[56],"advertisements,":[57],"virtual":[59],"reality.":[60],"A":[61],"visual":[62,101,108,114,118,160],"(visual":[64],"onomatopoeia)":[67],"contains":[68],"rich":[69],"information":[70],"not":[73],"present":[74],"text,":[77],"such":[78],"as":[79],"long-short":[81],"duration":[82],"image,":[85],"so":[86],"use":[88],"expected":[93],"diverse":[96,156],"sounds.":[97],"Therefore,":[98],"we":[99],"synthesis":[106],"onomatopoeia.":[109],"The":[110],"can":[112,154],"transfer":[113],"concepts":[115],"sound-source":[121,163],"image":[122],"synthesized":[125],"also":[128],"data":[131],"augmentation":[132],"focusing":[134],"on":[135],"repetition":[137],"enhance":[141],"performance":[143],"our":[145],"method.":[146],"experimental":[148],"evaluation":[149],"shows":[150],"methods":[153],"images.":[164]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
