{"id":"https://openalex.org/W7135221863","doi":"https://doi.org/10.48550/arxiv.2603.11482","title":"AnimeScore: A Preference-Based Dataset and Framework for Evaluating Anime-Like Speech Style","display_name":"AnimeScore: A Preference-Based Dataset and Framework for Evaluating Anime-Like Speech Style","publication_year":2026,"publication_date":"2026-03-12","ids":{"openalex":"https://openalex.org/W7135221863","doi":"https://doi.org/10.48550/arxiv.2603.11482"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.11482","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11482","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.11482","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109692719","display_name":"Joonyong Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Joonyong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5121294573","display_name":"Jerry Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jerry","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.36489999294281006,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.36489999294281006,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.156700000166893,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.09350000321865082,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.7896000146865845},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.7253000140190125},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.6707000136375427},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.6248999834060669},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.48069998621940613},{"id":"https://openalex.org/keywords/articulation","display_name":"Articulation (sociology)","score":0.45809999108314514},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.45719999074935913},{"id":"https://openalex.org/keywords/mean-opinion-score","display_name":"Mean opinion score","score":0.4535999894142151}],"concepts":[{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.7896000146865845},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.7253000140190125},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7056999802589417},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.6707000136375427},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.6248999834060669},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5593000054359436},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5005999803543091},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.48069998621940613},{"id":"https://openalex.org/C2779337067","wikidata":"https://www.wikidata.org/wiki/Q4800961","display_name":"Articulation (sociology)","level":3,"score":0.45809999108314514},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.45719999074935913},{"id":"https://openalex.org/C62897895","wikidata":"https://www.wikidata.org/wiki/Q1915482","display_name":"Mean opinion score","level":3,"score":0.4535999894142151},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.44929999113082886},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4293000102043152},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.4140999913215637},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.41119998693466187},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.3506999909877777},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.314300000667572},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.30320000648498535},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.28110000491142273},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.2718000113964081},{"id":"https://openalex.org/C66029223","wikidata":"https://www.wikidata.org/wiki/Q210847","display_name":"Manner of articulation","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.260699987411499},{"id":"https://openalex.org/C90697598","wikidata":"https://www.wikidata.org/wiki/Q3657183","display_name":"Objective test","level":3,"score":0.25529998540878296}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.11482","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11482","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.11482","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11482","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Evaluating":[0],"'anime-like'":[1],"voices":[2],"currently":[3],"relies":[4],"on":[5],"costly":[6],"subjective":[7],"judgments,":[8],"yet":[9],"no":[10],"standardized":[11],"objective":[12],"metric":[13,113],"exists.":[14],"A":[15],"key":[16],"challenge":[17],"is":[18,71],"that":[19,68,92,114],"anime-likeness,":[20],"unlike":[21],"naturalness,":[22],"lacks":[23],"a":[24,43,97,111,119],"shared":[25],"absolute":[26],"scale,":[27],"making":[28],"conventional":[29],"Mean":[30],"Opinion":[31],"Score":[32],"(MOS)":[33],"protocols":[34],"unreliable.":[35],"To":[36],"address":[37],"this":[38],"gap,":[39],"we":[40],"propose":[41],"AnimeScore,":[42],"preference-based":[44,123],"framework":[45],"for":[46,122],"automatic":[47],"anime-likeness":[48,70],"evaluation":[49],"via":[50],"pairwise":[51,56],"ranking.":[52],"We":[53,90],"collect":[54],"15,000":[55],"judgments":[57],"from":[58],"187":[59],"evaluators":[60],"with":[61],"free-form":[62],"descriptions,":[63],"and":[64,79],"acoustic":[65,94],"analysis":[66],"reveals":[67],"perceived":[69],"driven":[72],"by":[73],"controlled":[74],"resonance":[75],"shaping,":[76],"prosodic":[77],"continuity,":[78],"deliberate":[80],"articulation":[81],"rather":[82],"than":[83],"simple":[84],"heuristics":[85],"such":[86],"as":[87,118],"high":[88],"pitch.":[89],"show":[91],"handcrafted":[93],"features":[95],"reach":[96],"69.3%":[98],"AUC":[99],"ceiling,":[100],"while":[101],"SSL-based":[102],"ranking":[103],"models":[104],"achieve":[105],"up":[106],"to":[107],"90.8%":[108],"AUC,":[109],"providing":[110],"practical":[112],"can":[115],"also":[116],"serve":[117],"reward":[120],"signal":[121],"optimization":[124],"of":[125],"generative":[126],"speech":[127],"models.":[128]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-14T00:00:00"}
