{"id":"https://openalex.org/W7131228885","doi":"https://doi.org/10.71448/bcds2341-1","title":"Gaze-Supervised Hierarchical Attention Networks for Fine-Grained Visual Classification","display_name":"Gaze-Supervised Hierarchical Attention Networks for Fine-Grained Visual Classification","publication_year":2023,"publication_date":"2023-12-30","ids":{"openalex":"https://openalex.org/W7131228885","doi":"https://doi.org/10.71448/bcds2341-1"},"language":null,"primary_location":{"id":"doi:10.71448/bcds2341-1","is_oa":true,"landing_page_url":"https://doi.org/10.71448/bcds2341-1","pdf_url":null,"source":{"id":"https://openalex.org/S7407053757","display_name":"Bulletin of Computer and Data Sciences","issn_l":"3072-2926","issn":["3072-2926"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bulletin of Computer and Data Sciences","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.71448/bcds2341-1","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Edwin R. Hancock","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Edwin R. Hancock","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.34769589,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"4","issue":"1","first_page":"1","last_page":"14"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.21400000154972076,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.21400000154972076,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11707","display_name":"Gaze Tracking and Assistive Technology","score":0.19220000505447388,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.17399999499320984,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/gaze","display_name":"Gaze","score":0.8051999807357788},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7210999727249146},{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.604200005531311},{"id":"https://openalex.org/keywords/fixation","display_name":"Fixation (population genetics)","score":0.519599974155426},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5069000124931335},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5019999742507935},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.47519999742507935},{"id":"https://openalex.org/keywords/extractor","display_name":"Extractor","score":0.45509999990463257},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.41819998621940613}],"concepts":[{"id":"https://openalex.org/C2779916870","wikidata":"https://www.wikidata.org/wiki/Q14467155","display_name":"Gaze","level":2,"score":0.8051999807357788},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7301999926567078},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7210999727249146},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7138000130653381},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.604200005531311},{"id":"https://openalex.org/C146249460","wikidata":"https://www.wikidata.org/wiki/Q2914991","display_name":"Fixation (population genetics)","level":3,"score":0.519599974155426},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5069000124931335},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5019999742507935},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.47519999742507935},{"id":"https://openalex.org/C117978034","wikidata":"https://www.wikidata.org/wiki/Q5422192","display_name":"Extractor","level":2,"score":0.45509999990463257},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.41819998621940613},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40779998898506165},{"id":"https://openalex.org/C2986089797","wikidata":"https://www.wikidata.org/wiki/Q6501338","display_name":"Visual attention","level":3,"score":0.36480000615119934},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.35409998893737793},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3504999876022339},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.34450000524520874},{"id":"https://openalex.org/C2993807640","wikidata":"https://www.wikidata.org/wiki/Q103709453","display_name":"Attention network","level":2,"score":0.3425000011920929},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3386000096797943},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3264999985694885},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.31529998779296875},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.31200000643730164},{"id":"https://openalex.org/C144986985","wikidata":"https://www.wikidata.org/wiki/Q871236","display_name":"Hierarchical database model","level":2,"score":0.3046000003814697},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3009999990463257},{"id":"https://openalex.org/C2983787585","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Feature matching","level":3,"score":0.29440000653266907},{"id":"https://openalex.org/C160086991","wikidata":"https://www.wikidata.org/wiki/Q5939193","display_name":"Human visual system model","level":3,"score":0.26269999146461487},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.2542000114917755}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.71448/bcds2341-1","is_oa":true,"landing_page_url":"https://doi.org/10.71448/bcds2341-1","pdf_url":null,"source":{"id":"https://openalex.org/S7407053757","display_name":"Bulletin of Computer and Data Sciences","issn_l":"3072-2926","issn":["3072-2926"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bulletin of Computer and Data Sciences","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.71448/bcds2341-1","is_oa":true,"landing_page_url":"https://doi.org/10.71448/bcds2341-1","pdf_url":null,"source":{"id":"https://openalex.org/S7407053757","display_name":"Bulletin of Computer and Data Sciences","issn_l":"3072-2926","issn":["3072-2926"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bulletin of Computer and Data Sciences","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.7463819980621338,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Fine-grained":[0],"visual":[1],"classification":[2],"(FGVC)":[3],"becomes":[4],"especially":[5],"challenging":[6],"when":[7],"categories":[8],"are":[9],"organized":[10],"hierarchically":[11],"and":[12,86,100,110,146,157,164,191,217],"the":[13,77,168,182],"discriminative":[14],"cues":[15],"shrink":[16],"from":[17],"global":[18],"shapes":[19],"(order/family)":[20],"to":[21,113],"tiny":[22],"parts":[23],"(genus/species).":[24],"Existing":[25],"hierarchy-aware":[26,158],"methods":[27],"such":[28],"as":[29,41,139,141],"CHRF":[30],"learn":[31],"level-specific":[32],"attentions":[33],"implicitly,":[34],"but":[35,91],"they":[36],"only":[37],"use":[38],"human":[39,70,115,203],"gaze":[40,116,178,195,204],"a":[42,47,59,82,101],"post-hoc":[43],"validation":[44],"signal,":[45],"leaving":[46],"rich":[48],"source":[49],"of":[50,76],"supervision":[51],"unused.":[52],"In":[53],"this":[54],"work":[55],"we":[56],"introduce":[57],"GS-HAN,":[58],"gaze-supervised":[60],"hierarchical":[61,212],"attention":[62,68,98],"network":[63],"that":[64,106,125,150,175,202],"explicitly":[65],"aligns":[66],"model":[67],"with":[69,96,136],"fixation":[71],"patterns":[72],"at":[73,167],"every":[74],"level":[75,95],"taxonomy.":[78],"GS-HAN":[79,151],"builds":[80],"on":[81,134,142,162],"backbone":[83],"feature":[84,89],"extractor":[85],"CHRF-style":[87],"region":[88],"mining,":[90],"augments":[92],"each":[93],"hierarchy":[94],"gaze-conditioned":[97],"heads":[99],"Hierarchical":[102],"Gaze":[103],"Alignment":[104],"Loss":[105],"combines":[107],"KL":[108],"divergence":[109],"cosine":[111],"similarity":[112],"match":[114],"distributions.":[117],"We":[118],"further":[119],"retain":[120],"cross-hierarchical":[121],"orthogonal":[122],"fusion":[123],"so":[124],"coarse-level,":[126],"gaze-aligned":[127],"context":[128],"can":[129],"enhance":[130],"fine-level":[131],"recognition.":[132],"Evaluations":[133],"CUB-200-2011":[135],"ARISTO":[137],"gaze,":[138],"well":[140],"Butterfly-200,":[143],"VegFru,":[144],"FGVC-Aircraft,":[145],"Stanford":[147],"Cars,":[148],"show":[149],"consistently":[152],"outperforms":[153],"strong":[154],"FGVC":[155],"baselines":[156],"methods,":[159],"achieving":[160],"90.8%":[161],"CUB":[163],"clear":[165],"gains":[166],"most":[169],"fine-grained":[170],"(species)":[171],"level.":[172],"Ablations":[173],"verify":[174],"(i)":[176],"direct":[177],"supervision\u2014not":[179],"just":[180],"hierarchy\u2014drives":[181],"improvements,":[183],"(ii)":[184],"our":[185],"loss":[186],"improves":[187],"quantitative":[188],"gaze\u2013attention":[189],"similarity,":[190],"(iii)":[192],"even":[193],"partial":[194],"availability":[196],"yields":[197],"benefits.":[198],"The":[199],"results":[200],"demonstrate":[201],"is":[205],"an":[206],"effective,":[207],"underexploited":[208],"supervisory":[209],"signal":[210],"for":[211],"FGVC,":[213],"improving":[214],"both":[215],"accuracy":[216],"interpretability.":[218]},"counts_by_year":[],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2026-02-25T00:00:00"}
