{"id":"https://openalex.org/W4408354393","doi":"https://doi.org/10.1109/icassp49660.2025.10889173","title":"HAPG-SAQAM: Human Auditory Perception Guided Spatial Audio Quality Assessment Metric","display_name":"HAPG-SAQAM: Human Auditory Perception Guided Spatial Audio Quality Assessment Metric","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408354393","doi":"https://doi.org/10.1109/icassp49660.2025.10889173"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10889173","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889173","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yuanming Zheng","orcid":null},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanming Zheng","raw_affiliation_strings":["Wuhan University,National Engineering Research Center for Multimedia Software, School of Computer Science,Wuhan,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wuhan University,National Engineering Research Center for Multimedia Software, School of Computer Science,Wuhan,China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jiaxuan Yao","orcid":null},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaxuan Yao","raw_affiliation_strings":["Wuhan University,National Engineering Research Center for Multimedia Software, School of Computer Science,Wuhan,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wuhan University,National Engineering Research Center for Multimedia Software, School of Computer Science,Wuhan,China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031249202","display_name":"Xiangyu Deng","orcid":"https://orcid.org/0000-0002-7251-2529"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyu Deng","raw_affiliation_strings":["Wuhan University,National Engineering Research Center for Multimedia Software, School of Computer Science,Wuhan,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wuhan University,National Engineering Research Center for Multimedia Software, School of Computer Science,Wuhan,China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102859655","display_name":"Yuhong Yang","orcid":"https://orcid.org/0000-0003-3001-7957"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhong Yang","raw_affiliation_strings":["Wuhan University,National Engineering Research Center for Multimedia Software, School of Computer Science,Wuhan,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wuhan University,National Engineering Research Center for Multimedia Software, School of Computer Science,Wuhan,China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050124324","display_name":"Ruiqi Liao","orcid":"https://orcid.org/0000-0002-9553-3349"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruiqi Liao","raw_affiliation_strings":["Wuhan University,National Engineering Research Center for Multimedia Software, School of Computer Science,Wuhan,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wuhan University,National Engineering Research Center for Multimedia Software, School of Computer Science,Wuhan,China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041664453","display_name":"Weiping Tu","orcid":"https://orcid.org/0000-0002-6933-3298"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiping Tu","raw_affiliation_strings":["Wuhan University,National Engineering Research Center for Multimedia Software, School of Computer Science,Wuhan,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wuhan University,National Engineering Research Center for Multimedia Software, School of Computer Science,Wuhan,China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113361554","display_name":"Cedar Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I180662265","display_name":"China Mobile (China)","ror":"https://ror.org/05gftfe97","country_code":"CN","type":"company","lineage":["https://openalex.org/I180662265"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cedar Lin","raw_affiliation_strings":["Guangdong OPPO Mobile Telecommunications Corp.,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Guangdong OPPO Mobile Telecommunications Corp.,China","institution_ids":["https://openalex.org/I180662265"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.1339,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.8487316,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9239000082015991,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9239000082015991,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9126999974250793,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7123326063156128},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.6841265559196472},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.6742819547653198},{"id":"https://openalex.org/keywords/sound-quality","display_name":"Sound quality","score":0.6627099514007568},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4775558114051819},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4693974554538727},{"id":"https://openalex.org/keywords/auditory-display","display_name":"Auditory display","score":0.44773218035697937},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.14460489153862},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09108290076255798}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7123326063156128},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.6841265559196472},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.6742819547653198},{"id":"https://openalex.org/C167310288","wikidata":"https://www.wikidata.org/wiki/Q7564808","display_name":"Sound quality","level":2,"score":0.6627099514007568},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4775558114051819},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4693974554538727},{"id":"https://openalex.org/C171179263","wikidata":"https://www.wikidata.org/wiki/Q4820026","display_name":"Auditory display","level":2,"score":0.44773218035697937},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.14460489153862},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09108290076255798},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10889173","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889173","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1561928502","https://openalex.org/W1975517671","https://openalex.org/W2009649672","https://openalex.org/W2075546475","https://openalex.org/W2136682440","https://openalex.org/W2143156057","https://openalex.org/W2176625348","https://openalex.org/W2231730744","https://openalex.org/W2606611007","https://openalex.org/W2768337397","https://openalex.org/W2940285530","https://openalex.org/W2961183107","https://openalex.org/W2963775347","https://openalex.org/W2985229398","https://openalex.org/W3097906045","https://openalex.org/W3097934054","https://openalex.org/W3120954100","https://openalex.org/W3129121609","https://openalex.org/W4205225513","https://openalex.org/W4296068605","https://openalex.org/W4385822681","https://openalex.org/W4392909338","https://openalex.org/W6610119711","https://openalex.org/W6629412591","https://openalex.org/W6631116650","https://openalex.org/W6631943919","https://openalex.org/W6638676062","https://openalex.org/W6639363673","https://openalex.org/W6712077227","https://openalex.org/W6763156250","https://openalex.org/W6763613002","https://openalex.org/W6765123463","https://openalex.org/W6803345608"],"related_works":["https://openalex.org/W2037001540","https://openalex.org/W2628861693","https://openalex.org/W3203087560","https://openalex.org/W4361279463","https://openalex.org/W4232814730","https://openalex.org/W2975814312","https://openalex.org/W4387697615","https://openalex.org/W2149396112","https://openalex.org/W2622965534","https://openalex.org/W121534091"],"abstract_inverted_index":{"Spatial":[0],"audio":[1,141],"quality":[2,50,92,96],"evaluation":[3],"is":[4],"essential":[5],"for":[6,42,76],"applications":[7],"like":[8],"virtual":[9],"and":[10,31,48,94,104,139],"augmented":[11],"reality,":[12],"where":[13],"accurate":[14],"sound":[15],"reproduction":[16],"enhances":[17],"user":[18],"immersion.":[19],"While":[20],"subjective":[21,101,123],"listening":[22],"tests":[23],"are":[24,29],"the":[25,63,88,131],"gold":[26],"standard,":[27],"they":[28],"costly":[30],"time-consuming.":[32],"To":[33],"address":[34],"this,":[35],"we":[36],"propose":[37],"HAPG-SAQAM,":[38],"an":[39],"objective":[40],"metric":[41],"assessing":[43],"timbre":[44,91],"quality,":[45,47],"spatial":[46,95,138],"overall":[49,140],"of":[51,90,133],"binaural":[52],"audio,":[53],"guided":[54],"by":[55,112,125],"human":[56,80],"auditory":[57],"perception.":[58],"Our":[59],"contributions":[60,132],"include:":[61],"(1)":[62],"Multi-scale":[64],"Auditory":[65],"Guided":[66],"Feature":[67],"Extraction":[68],"(MAGFE)":[69],"module,":[70],"incorporating":[71],"gammatone":[72],"frequency":[73],"cepstral":[74],"coefficients":[75],"better":[77],"alignment":[78],"with":[79,122,127],"perception;":[81],"(2)":[82],"Perceptual":[83],"Weighted":[84],"Loss":[85],"(PWL),":[86],"optimizing":[87],"weighting":[89],"(TQ)":[93],"(SQ)":[97],"loss":[98],"based":[99],"on":[100],"test":[102],"data;":[103],"(3)":[105],"data":[106],"augmentation":[107],"techniques":[108],"to":[109,136],"enhance":[110],"robustness":[111],"amplifying":[113],"perceptual":[114],"distortions.":[115],"Experimental":[116],"results":[117],"show":[118],"HAPG-SAQAM":[119],"improves":[120],"correlation":[121],"scores":[124],"10%,":[126],"ablation":[128],"studies":[129],"confirming":[130],"its":[134],"components":[135],"enhanced":[137],"quality.":[142]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
