{"id":"https://openalex.org/W4360991078","doi":"https://doi.org/10.1145/3581641.3584083","title":"Evaluating Descriptive Quality of AI-Generated Audio Using Image-Schemas","display_name":"Evaluating Descriptive Quality of AI-Generated Audio Using Image-Schemas","publication_year":2023,"publication_date":"2023-03-27","ids":{"openalex":"https://openalex.org/W4360991078","doi":"https://doi.org/10.1145/3581641.3584083"},"language":"en","primary_location":{"id":"doi:10.1145/3581641.3584083","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3581641.3584083","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3581641.3584083","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th International Conference on Intelligent User Interfaces","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3581641.3584083","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038084030","display_name":"Purnima Kamath","orcid":"https://orcid.org/0000-0003-0351-6574"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Purnima Kamath","raw_affiliation_strings":["Augmented Human Lab, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Augmented Human Lab, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066542435","display_name":"Zhuoyao Li","orcid":"https://orcid.org/0000-0003-0295-0897"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Zhuoyao Li","raw_affiliation_strings":["National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016367573","display_name":"Chitralekha Gupta","orcid":"https://orcid.org/0000-0003-1350-9095"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Chitralekha Gupta","raw_affiliation_strings":["Augmented Human Lab, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Augmented Human Lab, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079154791","display_name":"Kokil Jaidka","orcid":"https://orcid.org/0000-0002-8127-1157"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Kokil Jaidka","raw_affiliation_strings":["National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027647989","display_name":"Suranga Nanayakkara","orcid":"https://orcid.org/0000-0001-7441-5493"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Suranga Nanayakkara","raw_affiliation_strings":["Augmented Human Lab, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Augmented Human Lab, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062692118","display_name":"Lonce Wyse","orcid":"https://orcid.org/0000-0002-9200-1048"},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Pompeu Fabra University","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Lonce Wyse","raw_affiliation_strings":["Music Technology Group, University Pompeu Fabra, Spain"],"affiliations":[{"raw_affiliation_string":"Music Technology Group, University Pompeu Fabra, Spain","institution_ids":["https://openalex.org/I170486558"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5038084030"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":1.7387,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.83193824,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"621","last_page":"632"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12032","display_name":"Multisensory perception and integration","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12032","display_name":"Multisensory perception and integration","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9871000051498413,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7852667570114136},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.7047383189201355},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.5656076073646545},{"id":"https://openalex.org/keywords/crowdsourcing","display_name":"Crowdsourcing","score":0.5562227964401245},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.5140105485916138},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.47891414165496826},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.44810691475868225},{"id":"https://openalex.org/keywords/descriptive-statistics","display_name":"Descriptive statistics","score":0.4254835247993469},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33050769567489624},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3264945447444916},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.32379674911499023},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.27073872089385986},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.12283685803413391},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.10173672437667847}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7852667570114136},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.7047383189201355},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.5656076073646545},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.5562227964401245},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.5140105485916138},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.47891414165496826},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.44810691475868225},{"id":"https://openalex.org/C39896193","wikidata":"https://www.wikidata.org/wiki/Q380344","display_name":"Descriptive statistics","level":2,"score":0.4254835247993469},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33050769567489624},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3264945447444916},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.32379674911499023},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.27073872089385986},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.12283685803413391},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.10173672437667847},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3581641.3584083","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3581641.3584083","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3581641.3584083","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th International Conference on Intelligent User Interfaces","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3581641.3584083","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3581641.3584083","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3581641.3584083","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th International Conference on Intelligent User Interfaces","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6299999952316284,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G693569427","display_name":null,"funder_award_id":"Learning Generative Recurrent Neural Networks","funder_id":"https://openalex.org/F4320311649","funder_display_name":"Ministry of Education"}],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320311649","display_name":"Ministry of Education","ror":"https://ror.org/036nq5137"},{"id":"https://openalex.org/F4320322724","display_name":"Ministry of Education, India","ror":"https://ror.org/048xjjh50"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4360991078.pdf","grobid_xml":"https://content.openalex.org/works/W4360991078.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W1580980540","https://openalex.org/W1583837637","https://openalex.org/W1606487971","https://openalex.org/W1992160271","https://openalex.org/W2007018772","https://openalex.org/W2032337854","https://openalex.org/W2062663442","https://openalex.org/W2108708552","https://openalex.org/W2187193317","https://openalex.org/W2395291588","https://openalex.org/W2395718496","https://openalex.org/W2405742202","https://openalex.org/W2477677455","https://openalex.org/W2726137762","https://openalex.org/W2774116887","https://openalex.org/W2791686384","https://openalex.org/W2794219599","https://openalex.org/W2799412098","https://openalex.org/W2799791618","https://openalex.org/W2903005299","https://openalex.org/W2938577267","https://openalex.org/W2940678350","https://openalex.org/W2941505948","https://openalex.org/W2943189559","https://openalex.org/W2946474696","https://openalex.org/W2963185411","https://openalex.org/W2972478942","https://openalex.org/W2989955315","https://openalex.org/W3163632208","https://openalex.org/W3194338218","https://openalex.org/W3213080639","https://openalex.org/W4205572715","https://openalex.org/W4225012671","https://openalex.org/W4226409935","https://openalex.org/W6949048490"],"related_works":["https://openalex.org/W3032998312","https://openalex.org/W135177976","https://openalex.org/W4384486036","https://openalex.org/W1503094549","https://openalex.org/W2337920774","https://openalex.org/W4286908577","https://openalex.org/W2886410948","https://openalex.org/W2025875869","https://openalex.org/W4318823662","https://openalex.org/W3207526114"],"abstract_inverted_index":{"Novel":[0],"AI-generated":[1,66,129,152],"audio":[2,36,79,107,130,153],"samples":[3],"are":[4],"evaluated":[5],"for":[6,27,140],"descriptive":[7,35,78],"qualities":[8,150],"such":[9,28,84],"as":[10],"the":[11,22,34,45,54,71,101,124,135],"smoothness":[12],"of":[13,56,59,73,96,103,128,137,151],"a":[14,77],"morph":[15],"using":[16,83,119],"crowdsourced":[17,160],"human":[18],"listening":[19,141],"tests.":[20],"However,":[21],"methods":[23],"to":[24,31,61,64,98,147,156],"design":[25,62,139],"interfaces":[26,63],"experiments":[29,97],"and":[30,75,90,108,126,143],"effectively":[32],"articulate":[33],"quality":[37,80,102,125],"under":[38,81],"test":[39],"receive":[40],"very":[41],"little":[42],"attention":[43],"in":[44,115],"evaluation":[46],"metrics":[47],"literature.":[48],"In":[49],"this":[50],"paper,":[51],"we":[52,69,92,121],"explore":[53],"use":[55],"visual":[57,145],"metaphors":[58],"image-schema":[60],"evaluate":[65],"audio.":[67],"Furthermore,":[68],"highlight":[70],"importance":[72,136],"framing":[74],"contextualizing":[76],"measurement":[82],"constructs.":[85],"Using":[86],"both":[87,116],"pitched":[88],"sounds":[89],"textures,":[91],"conduct":[93],"two":[94],"sets":[95],"investigate":[99],"how":[100],"responses":[104],"vary":[105],"with":[106],"task":[109],"complexities.":[110],"Our":[111,132],"results":[112],"show":[113],"that,":[114],"cases,":[117],"by":[118],"image-schemas":[120],"can":[122],"improve":[123],"consensus":[127],"evaluations.":[131],"findings":[133],"reinforce":[134],"interface":[138],"tests":[142],"stationary":[144],"constructs":[146],"communicate":[148],"temporal":[149],"samples,":[154],"especially":[155],"na\u00efve":[157],"listeners":[158],"on":[159],"platforms.":[161]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
