{"id":"https://openalex.org/W7156635947","doi":"https://doi.org/10.48550/arxiv.2604.22841","title":"ATTN-FIQA: Interpretable Attention-based Face Image Quality Assessment with Vision Transformers","display_name":"ATTN-FIQA: Interpretable Attention-based Face Image Quality Assessment with Vision Transformers","publication_year":2026,"publication_date":"2026-04-21","ids":{"openalex":"https://openalex.org/W7156635947","doi":"https://doi.org/10.48550/arxiv.2604.22841"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.22841","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22841","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.22841","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115792474","display_name":"Guray Ozgur","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ozgur, Guray","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106356516","display_name":"Tahar Chettaoui","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chettaoui, Tahar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134759351","display_name":"Eduarda Caldeira","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Caldeira, Eduarda","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069065641","display_name":"Jan Niklas Kolf","orcid":"https://orcid.org/0000-0002-0037-5334"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kolf, Jan Niklas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102902943","display_name":"Marco Huber","orcid":"https://orcid.org/0000-0003-3413-6291"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huber, Marco","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000611305","display_name":"Andrea Atzori","orcid":"https://orcid.org/0000-0002-6910-206X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Atzori, Andrea","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134798629","display_name":"Naser Damer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Damer, Naser","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5028035125","display_name":"Fadi Boutros","orcid":"https://orcid.org/0000-0003-4516-9128"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Boutros, Fadi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5115792474"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.8007000088691711,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.8007000088691711,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.051500000059604645,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.03189999982714653,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7835999727249146},{"id":"https://openalex.org/keywords/facial-recognition-system","display_name":"Facial recognition system","score":0.586899995803833},{"id":"https://openalex.org/keywords/image-quality","display_name":"Image quality","score":0.5526999831199646},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.5044000148773193},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4799000024795532},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.46560001373291016},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.439300000667572},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.41830000281333923}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7835999727249146},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7246999740600586},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7091000080108643},{"id":"https://openalex.org/C31510193","wikidata":"https://www.wikidata.org/wiki/Q1192553","display_name":"Facial recognition system","level":3,"score":0.586899995803833},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.5526999831199646},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.5044000148773193},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.49900001287460327},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4799000024795532},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.46560001373291016},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.439300000667572},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.41830000281333923},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3978999853134155},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37560001015663147},{"id":"https://openalex.org/C184297639","wikidata":"https://www.wikidata.org/wiki/Q177765","display_name":"Biometrics","level":2,"score":0.34529998898506165},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.33079999685287476},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3231000006198883},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3091000020503998},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2842000126838684},{"id":"https://openalex.org/C3020001037","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assessment","level":3,"score":0.2799000144004822},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.2678999900817871},{"id":"https://openalex.org/C88799230","wikidata":"https://www.wikidata.org/wiki/Q3398329","display_name":"Three-dimensional face recognition","level":5,"score":0.2653999924659729}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.22841","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22841","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.22841","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22841","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.7358390688896179,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Face":[0],"Image":[1],"Quality":[2],"Assessment":[3],"(FIQA)":[4],"aims":[5],"to":[6,199],"assess":[7],"the":[8,45,130],"recognition":[9,20,87],"utility":[10],"of":[11,47],"face":[12,19,86,186],"samples":[13],"and":[14,38,141,172,189],"is":[15],"essential":[16],"for":[17],"reliable":[18],"(FR)":[21],"systems.":[22],"Existing":[23],"approaches":[24],"require":[25],"computationally":[26],"expensive":[27],"procedures":[28],"such":[29],"as":[30,58,91],"multiple":[31],"forward":[32,153],"passes,":[33],"backpropagation,":[34,161],"or":[35,162],"additional":[36,163],"training,":[37],"only":[39,150],"recent":[40],"work":[41,69,177],"has":[42],"focused":[43],"on":[44],"use":[46],"Vision":[48,84],"Transformers.":[49],"Recent":[50],"studies":[51],"highlighted":[52],"that":[53,76,96,179],"these":[54],"architectures":[55],"inherently":[56],"function":[57],"saliency":[59],"learners":[60],"with":[61,104,185],"attention":[62,80,97,115,127,136],"patterns":[63],"naturally":[64],"encoding":[65],"spatial":[66,191],"importance.":[67],"This":[68],"proposes":[70],"ATTN-FIQA,":[71],"a":[72,151],"novel":[73],"training-free":[74],"approach":[75],"investigates":[77],"whether":[78],"pre-softmax":[79,126],"scores":[81,145,182],"from":[82,129],"pre-trained":[83,156],"Transformer-based":[85],"models":[88,157],"can":[89],"serve":[90],"quality":[92,144,181,188,200],"indicators.":[93],"We":[94],"hypothesize":[95],"magnitudes":[98],"intrinsically":[99],"encode":[100],"quality:":[101],"high-quality":[102],"images":[103,119],"discriminative":[105],"facial":[106,195],"features":[107],"enable":[108],"strong":[109],"query-key":[110],"alignments":[111],"producing":[112],"focused,":[113],"high-magnitude":[114],"patterns,":[116],"while":[117],"degraded":[118],"generate":[120],"diffuse,":[121],"low-magnitude":[122],"patterns.":[123],"ATTN-FIQA":[124],"extracts":[125],"matrices":[128],"final":[131],"transformer":[132],"block,":[133],"aggregate":[134],"multi-head":[135],"information":[137],"across":[138,168],"all":[139],"patches,":[140],"compute":[142],"image-level":[143],"through":[146,155],"simple":[147],"averaging,":[148],"requiring":[149],"single":[152],"pass":[154],"without":[158],"architectural":[159],"modifications,":[160],"training.":[164],"Through":[165],"comprehensive":[166],"evaluation":[167],"eight":[169],"benchmark":[170],"datasets":[171],"four":[173],"FR":[174],"models,":[175],"this":[176],"demonstrates":[178],"attention-based":[180],"effectively":[183],"correlate":[184],"image":[187],"provide":[190],"interpretability,":[192],"revealing":[193],"which":[194],"regions":[196],"contribute":[197],"most":[198],"determination.":[201]},"counts_by_year":[],"updated_date":"2026-05-04T08:30:34.212998","created_date":"2026-04-29T00:00:00"}
