{"id":"https://openalex.org/W7148359692","doi":"https://doi.org/10.1109/asru65441.2025.11434625","title":"QAMRO: Quality-aware Adaptive Margin Ranking Optimization for Human-aligned Assessment of Audio Generation Systems","display_name":"QAMRO: Quality-aware Adaptive Margin Ranking Optimization for Human-aligned Assessment of Audio Generation Systems","publication_year":2025,"publication_date":"2025-12-06","ids":{"openalex":"https://openalex.org/W7148359692","doi":"https://doi.org/10.1109/asru65441.2025.11434625"},"language":null,"primary_location":{"id":"doi:10.1109/asru65441.2025.11434625","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434625","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100337614","display_name":"C.M. Wang","orcid":"https://orcid.org/0000-0002-7604-1744"},"institutions":[{"id":"https://openalex.org/I134161618","display_name":"National Taiwan Normal University","ror":"https://ror.org/059dkdx38","country_code":"TW","type":"education","lineage":["https://openalex.org/I134161618"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Chien-Chun Wang","raw_affiliation_strings":["National Taiwan Normal University,Dept. Computer Science and Information Engineering,Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan Normal University,Dept. Computer Science and Information Engineering,Taiwan","institution_ids":["https://openalex.org/I134161618"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102841009","display_name":"Kui Huang","orcid":"https://orcid.org/0009-0005-5411-581X"},"institutions":[{"id":"https://openalex.org/I134161618","display_name":"National Taiwan Normal University","ror":"https://ror.org/059dkdx38","country_code":"TW","type":"education","lineage":["https://openalex.org/I134161618"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Kuan-Tang Huang","raw_affiliation_strings":["National Taiwan Normal University,Dept. Computer Science and Information Engineering,Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan Normal University,Dept. Computer Science and Information Engineering,Taiwan","institution_ids":["https://openalex.org/I134161618"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042112521","display_name":"Cheng-Yeh Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I134161618","display_name":"National Taiwan Normal University","ror":"https://ror.org/059dkdx38","country_code":"TW","type":"education","lineage":["https://openalex.org/I134161618"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Cheng-Yeh Yang","raw_affiliation_strings":["National Taiwan Normal University,Dept. Computer Science and Information Engineering,Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan Normal University,Dept. Computer Science and Information Engineering,Taiwan","institution_ids":["https://openalex.org/I134161618"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048338308","display_name":"Hung-Shin Lee","orcid":"https://orcid.org/0000-0001-7044-9434"},"institutions":[{"id":"https://openalex.org/I4210136767","display_name":"Link Institut (Switzerland)","ror":"https://ror.org/04vhpaz11","country_code":"CH","type":"company","lineage":["https://openalex.org/I4210136767"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Hung-Shin Lee","raw_affiliation_strings":["United Link Co., Ltd,Taiwan"],"affiliations":[{"raw_affiliation_string":"United Link Co., Ltd,Taiwan","institution_ids":["https://openalex.org/I4210136767"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132804366","display_name":"Hsin-Min Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210130467","display_name":"Institute of Taiwan History, Academia Sinica","ror":"https://ror.org/02vf41z82","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210130467","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hsin-Min Wang","raw_affiliation_strings":["Institute of Computer Science Academia Sinica,Taiwan"],"affiliations":[{"raw_affiliation_string":"Institute of Computer Science Academia Sinica,Taiwan","institution_ids":["https://openalex.org/I4210130467"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5132800680","display_name":"Berlin Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I134161618","display_name":"National Taiwan Normal University","ror":"https://ror.org/059dkdx38","country_code":"TW","type":"education","lineage":["https://openalex.org/I134161618"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Berlin Chen","raw_affiliation_strings":["National Taiwan Normal University,Dept. Computer Science and Information Engineering,Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan Normal University,Dept. Computer Science and Information Engineering,Taiwan","institution_ids":["https://openalex.org/I134161618"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100337614"],"corresponding_institution_ids":["https://openalex.org/I134161618"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.75408603,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.23919999599456787,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.23919999599456787,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.1914999932050705,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.07500000298023224,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.8496999740600586},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.7213000059127808},{"id":"https://openalex.org/keywords/regression","display_name":"Regression","score":0.5720000267028809},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5098000168800354},{"id":"https://openalex.org/keywords/regression-analysis","display_name":"Regression analysis","score":0.43160000443458557},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.41130000352859497},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.3481999933719635}],"concepts":[{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.8496999740600586},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.7213000059127808},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.717199981212616},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6435999870300293},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6104000210762024},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.5720000267028809},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5098000168800354},{"id":"https://openalex.org/C152877465","wikidata":"https://www.wikidata.org/wiki/Q208042","display_name":"Regression analysis","level":2,"score":0.43160000443458557},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.41130000352859497},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.39070001244544983},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.3481999933719635},{"id":"https://openalex.org/C48921125","wikidata":"https://www.wikidata.org/wiki/Q10861030","display_name":"Linear regression","level":2,"score":0.3154999911785126},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.29600000381469727},{"id":"https://openalex.org/C62897895","wikidata":"https://www.wikidata.org/wiki/Q1915482","display_name":"Mean opinion score","level":3,"score":0.288100004196167},{"id":"https://openalex.org/C86037889","wikidata":"https://www.wikidata.org/wiki/Q4330127","display_name":"Learning to rank","level":3,"score":0.28769999742507935},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2574999928474426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru65441.2025.11434625","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434625","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4357427656650543,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2108862644","https://openalex.org/W2128877075","https://openalex.org/W2921310091","https://openalex.org/W2982693525","https://openalex.org/W2990171858","https://openalex.org/W3035134467","https://openalex.org/W3198270377","https://openalex.org/W3202278141","https://openalex.org/W4214494681","https://openalex.org/W4225302959","https://openalex.org/W4225956675","https://openalex.org/W4249736682","https://openalex.org/W4372259826","https://openalex.org/W4372260310","https://openalex.org/W4377140494","https://openalex.org/W4384519174","https://openalex.org/W4385568035","https://openalex.org/W4404534721","https://openalex.org/W4408353668","https://openalex.org/W4412945617","https://openalex.org/W4417427449"],"related_works":[],"abstract_inverted_index":{"Evaluating":[0],"audio":[1],"generation":[2],"systems,":[3],"including":[4],"text-to-music":[5],"(TTM),":[6],"text-to-speech":[7],"(TTS),":[8],"and":[9,18,74,87,89],"text-to-audio":[10],"(TTA),":[11],"remains":[12],"challenging":[13],"due":[14],"to":[15,70],"the":[16,41,94],"subjective":[17],"multi-dimensional":[19],"nature":[20],"of":[21,43],"human":[22,105],"perception.":[23],"Existing":[24],"methods":[25],"treat":[26],"mean":[27],"opinion":[28],"score":[29],"(MOS)":[30],"prediction":[31],"as":[32,85],"a":[33,53],"regression":[34,38,64],"problem,":[35],"but":[36],"standard":[37],"losses":[39],"overlook":[40],"relativity":[42],"perceptual":[44,72],"judgments.":[45],"To":[46],"address":[47],"this":[48],"limitation,":[49],"we":[50],"introduce":[51],"QAMRO,":[52],"novel":[54],"Quality-aware":[55],"Adaptive":[56],"Margin":[57],"Ranking":[58],"Optimization":[59],"framework":[60,79],"that":[61],"seamlessly":[62],"integrates":[63],"objectives":[65],"from":[66],"different":[67],"perspectives,":[68],"aiming":[69],"highlight":[71],"differences":[73],"prioritize":[75],"accurate":[76],"ratings.":[77],"Our":[78],"leverages":[80],"pre-trained":[81],"audio-text":[82],"models":[83],"such":[84],"CLAP":[86],"Audiobox-Aesthetics,":[88],"is":[90],"trained":[91],"exclusively":[92],"on":[93],"official":[95],"AudioMOS":[96],"Challenge":[97],"2025":[98],"dataset.":[99],"It":[100],"demonstrates":[101],"superior":[102],"alignment":[103],"with":[104],"evaluations":[106],"across":[107],"all":[108],"dimensions,":[109],"significantly":[110],"outperforming":[111],"robust":[112],"baseline":[113],"models.":[114]},"counts_by_year":[],"updated_date":"2026-04-03T16:44:17.987007","created_date":"2026-04-03T00:00:00"}
