{"id":"https://openalex.org/W7136159993","doi":"https://doi.org/10.48550/arxiv.2603.13057","title":"Reference-Free Image Quality Assessment for Virtual Try-On via Human Feedback","display_name":"Reference-Free Image Quality Assessment for Virtual Try-On via Human Feedback","publication_year":2026,"publication_date":"2026-03-13","ids":{"openalex":"https://openalex.org/W7136159993","doi":"https://doi.org/10.48550/arxiv.2603.13057"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.13057","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13057","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.13057","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109032670","display_name":"Yuki Hirakawa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hirakawa, Yuki","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129590552","display_name":"Takashi Wada","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wada, Takashi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129548154","display_name":"Ryotaro Shimizu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shimizu, Ryotaro","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078581954","display_name":"Takuya Furusawa","orcid":"https://orcid.org/0000-0001-9521-6514"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Furusawa, Takuya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129572877","display_name":"Yuki Saito","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saito, Yuki","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072874208","display_name":"Ryosuke Araki","orcid":"https://orcid.org/0000-0001-5935-6587"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Araki, Ryosuke","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129501446","display_name":"Tianwei Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Tianwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129520287","display_name":"Fan Mo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mo, Fan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5070908826","display_name":"Yoshimitsu Aoki","orcid":"https://orcid.org/0000-0001-7361-0027"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aoki, Yoshimitsu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.6510000228881836,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.6510000228881836,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.20909999310970306,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.0568000003695488,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.6104000210762024},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5819000005722046},{"id":"https://openalex.org/keywords/image-quality","display_name":"Image quality","score":0.49000000953674316},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4731000065803528},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4657000005245209},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4034999907016754},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.38190001249313354},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.3634999990463257}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6786999702453613},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.6104000210762024},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5910000205039978},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5819000005722046},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.49000000953674316},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.47530001401901245},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4731000065803528},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4657000005245209},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4034999907016754},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40220001339912415},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3828999996185303},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.38190001249313354},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.3634999990463257},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.35190001130104065},{"id":"https://openalex.org/C3020001037","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assessment","level":3,"score":0.351500004529953},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.3463999927043915},{"id":"https://openalex.org/C51970089","wikidata":"https://www.wikidata.org/wiki/Q44415","display_name":"Virtual image","level":2,"score":0.32749998569488525},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.32739999890327454},{"id":"https://openalex.org/C2779346075","wikidata":"https://www.wikidata.org/wiki/Q7268763","display_name":"Quality Score","level":3,"score":0.29440000653266907},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.272599995136261},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C150303390","wikidata":"https://www.wikidata.org/wiki/Q1983852","display_name":"Virtual actor","level":3,"score":0.26010000705718994},{"id":"https://openalex.org/C194969405","wikidata":"https://www.wikidata.org/wiki/Q170519","display_name":"Virtual reality","level":2,"score":0.257999986410141}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.13057","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13057","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.13057","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13057","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Given":[0],"a":[1,5,13,44,108,128,204,229],"person":[2,18,55],"image":[3,15],"and":[4,77,84,142,180,209],"garment":[6,59,178],"image,":[7],"image-based":[8],"Virtual":[9,105],"Try-ON":[10],"(VTON)":[11],"synthesizes":[12],"try-on":[14,134,173],"of":[16,38,52,91,154,183,233],"the":[17,20,53,57,88,152,159,181,212],"wearing":[19,56],"target":[21,58],"garment.":[22],"As":[23],"VTON":[24,140,236],"systems":[25],"become":[26],"increasingly":[27],"important":[28],"in":[29,168,211],"practical":[30],"applications":[31],"such":[32,72,189],"as":[33,43,73],"fashion":[34],"e-commerce,":[35],"reliable":[36,221],"evaluation":[37,65,167,232],"their":[39],"outputs":[40],"has":[41],"emerged":[42],"critical":[45],"challenge.":[46],"In":[47],"real-world":[48],"scenarios,":[49],"ground-truth":[50,118],"images":[51,135],"same":[54],"are":[60],"typically":[61],"unavailable,":[62],"making":[63],"reference-based":[64],"impractical.":[66],"Moreover,":[67,226],"widely":[68],"used":[69],"distribution-level":[70],"metrics":[71],"Fr\u00e9chet":[74],"Inception":[75,79],"Distance":[76,80],"Kernel":[78],"measure":[81],"dataset-level":[82],"similarity":[83],"fail":[85],"to":[86,162],"reflect":[87],"perceptual":[89,123],"quality":[90,114,144,174,224],"individual":[92],"generated":[93,136],"images.":[94,119],"To":[95,120,151,186],"address":[96],"these":[97],"limitations,":[98],"we":[99,125,191,227],"propose":[100],"Image":[101],"Quality":[102],"Assessment":[103],"for":[104,111,164],"Try-On":[106],"(VTON-IQA),":[107],"reference-free":[109],"framework":[110],"human-aligned,":[112],"image-level":[113,223],"assessment":[115],"without":[116],"requiring":[117],"model":[121,188],"human":[122,165],"judgments,":[124],"construct":[126],"VTON-QBench,":[127],"large-scale":[129],"human-annotated":[130],"benchmark":[131,231],"comprising":[132],"62,688":[133],"by":[137,202],"14":[138,234],"representative":[139,235],"models":[141,237],"431,800":[143],"annotations":[145],"collected":[146],"from":[147],"13,838":[148],"qualified":[149],"annotators.":[150],"best":[153],"our":[155],"knowledge,":[156],"this":[157],"is":[158],"largest":[160],"dataset":[161],"date":[163],"subjective":[166],"virtual":[169,172],"try-on.":[170],"Evaluating":[171],"requires":[175],"verifying":[176],"both":[177],"fidelity":[179],"preservation":[182],"person-specific":[184],"details.":[185],"explicitly":[187],"interactions,":[190],"introduce":[192],"an":[193],"Interleaved":[194],"Cross-Attention":[195],"module":[196],"that":[197,218],"extends":[198],"standard":[199],"transformer":[200],"blocks":[201],"inserting":[203],"cross-attention":[205],"layer":[206],"between":[207],"self-attention":[208],"MLP":[210],"latter":[213],"blocks.":[214],"Extensive":[215],"experiments":[216],"show":[217],"VTON-IQA":[219],"achieves":[220],"human-aligned":[222],"prediction.":[225],"conduct":[228],"comprehensive":[230],"using":[238],"VTON-IQA.":[239]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-12T08:23:45.883708","created_date":"2026-03-17T00:00:00"}
