{"id":"https://openalex.org/W4403922698","doi":"https://doi.org/10.1145/3686215.3688381","title":"Gaze-Informed Vision Transformers: Predicting Driving Decisions Under Uncertainty","display_name":"Gaze-Informed Vision Transformers: Predicting Driving Decisions Under Uncertainty","publication_year":2024,"publication_date":"2024-10-30","ids":{"openalex":"https://openalex.org/W4403922698","doi":"https://doi.org/10.1145/3686215.3688381"},"language":"en","primary_location":{"id":"doi:10.1145/3686215.3688381","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3686215.3688381","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the 26th International Conference on Multimodal Interaction","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071103401","display_name":"Sharath Koorathota","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sharath Koorathota","raw_affiliation_strings":["Columbia University, USA"],"raw_orcid":"https://orcid.org/0000-0003-2014-424X","affiliations":[{"raw_affiliation_string":"Columbia University, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103333689","display_name":"\u039d\u03b9\u03ba\u03cc\u03bb\u03b1\u03c2 \u03a0\u03b1\u03c0\u03b1\u03b4\u03cc\u03c0\u03bf\u03c5\u03bb\u03bf\u03c2","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nikolas Papadopoulos","raw_affiliation_strings":["Columbia University, USA"],"raw_orcid":"https://orcid.org/0009-0001-1082-5140","affiliations":[{"raw_affiliation_string":"Columbia University, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jia Li Ma","orcid":"https://orcid.org/0009-0000-0701-2870"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jia Li Ma","raw_affiliation_strings":["Columbia University, USA"],"raw_orcid":"https://orcid.org/0009-0000-0701-2870","affiliations":[{"raw_affiliation_string":"Columbia University, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101238897","display_name":"Shruti Kumar","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shruti Kumar","raw_affiliation_strings":["Columbia University, USA"],"raw_orcid":"https://orcid.org/0009-0003-9987-564X","affiliations":[{"raw_affiliation_string":"Columbia University, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063311546","display_name":"Xiaoxiao Sun","orcid":"https://orcid.org/0000-0002-0823-8713"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaoxiao Sun","raw_affiliation_strings":["Columbia University, USA"],"raw_orcid":"https://orcid.org/0000-0002-0823-8713","affiliations":[{"raw_affiliation_string":"Columbia University, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034523042","display_name":"Arunesh Mittal","orcid":"https://orcid.org/0000-0002-5160-7780"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arunesh Mittal","raw_affiliation_strings":["Columbia University, USA"],"raw_orcid":"https://orcid.org/0000-0002-5160-7780","affiliations":[{"raw_affiliation_string":"Columbia University, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058115516","display_name":"Patrick Adelman","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Patrick Adelman","raw_affiliation_strings":["Georgia Institute of Technology, USA"],"raw_orcid":"https://orcid.org/0009-0008-3534-1848","affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042749874","display_name":"Paul Sajda","orcid":"https://orcid.org/0000-0002-9738-1342"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Paul Sajda","raw_affiliation_strings":["Columbia University, USA"],"raw_orcid":"https://orcid.org/0000-0002-9738-1342","affiliations":[{"raw_affiliation_string":"Columbia University, USA","institution_ids":["https://openalex.org/I78577930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5071103401"],"corresponding_institution_ids":["https://openalex.org/I78577930"],"apc_list":null,"apc_paid":null,"fwci":1.3729,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.81861042,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"184","last_page":"194"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11707","display_name":"Gaze Tracking and Assistive Technology","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11707","display_name":"Gaze Tracking and Assistive Technology","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10525","display_name":"Human-Automation Interaction and Safety","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10427","display_name":"Visual perception and processing mechanisms","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/gaze","display_name":"Gaze","score":0.8798022270202637},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6284810900688171},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5324110984802246},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5312730073928833},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5228438973426819},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3635370135307312},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1892751157283783},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.07850903272628784}],"concepts":[{"id":"https://openalex.org/C2779916870","wikidata":"https://www.wikidata.org/wiki/Q14467155","display_name":"Gaze","level":2,"score":0.8798022270202637},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6284810900688171},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5324110984802246},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5312730073928833},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5228438973426819},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3635370135307312},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1892751157283783},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.07850903272628784},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3686215.3688381","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3686215.3688381","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the 26th International Conference on Multimodal Interaction","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4227104920","display_name":null,"funder_award_id":"N00014- 20-1-2027","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"},{"id":"https://openalex.org/G6460295762","display_name":null,"funder_award_id":"W911NF-23-2-0067","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"},{"id":"https://openalex.org/G6791443481","display_name":null,"funder_award_id":"FA9550-22-1-0337","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1993091346","https://openalex.org/W2023718514","https://openalex.org/W2050579567","https://openalex.org/W2072344378","https://openalex.org/W2088335308","https://openalex.org/W2093449404","https://openalex.org/W2132578666","https://openalex.org/W2144214580","https://openalex.org/W2474210745","https://openalex.org/W2606499982","https://openalex.org/W2765767541","https://openalex.org/W2886189758","https://openalex.org/W3016170110","https://openalex.org/W3094502228","https://openalex.org/W3106388706","https://openalex.org/W3161120562","https://openalex.org/W3202670445","https://openalex.org/W4224281996","https://openalex.org/W4297812995","https://openalex.org/W4382599796","https://openalex.org/W4392309207"],"related_works":["https://openalex.org/W2385108104","https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747"],"abstract_inverted_index":{"Vision":[0],"Transformers":[1],"(ViT)":[2],"have":[3],"advanced":[4],"computer":[5],"vision,":[6],"yet":[7],"their":[8],"efficacy":[9],"in":[10,34,39,55,61,102,161],"complex":[11,169],"tasks":[12],"like":[13],"driving":[14,35,57],"remains":[15],"less":[16],"explored.":[17],"This":[18,92,146],"study":[19],"enhances":[20],"ViT":[21,67,78,123],"by":[22],"integrating":[23],"human":[24,52,63,74,143],"eye":[25,53],"gaze,":[26],"captured":[27],"via":[28],"eye-tracking,":[29],"to":[30,168],"increase":[31],"prediction":[32],"accuracy":[33],"scenarios":[36],"under":[37,125],"uncertainty":[38,127],"both":[40,62],"real-world":[41],"and":[42,65,77,90],"virtual":[43],"reality":[44],"scenarios.":[45],"First,":[46],"we":[47,81],"establish":[48],"the":[49,71,83,100,111],"significance":[50],"of":[51,85],"gaze":[54,144],"left-right":[56],"decisions,":[58],"as":[59,156,158],"observed":[60],"subjects":[64],"a":[66,116],"model.":[68],"By":[69],"comparing":[70],"similarity":[72],"between":[73],"fixation":[75,96],"maps":[76],"attention":[79,105,141],"weights,":[80],"reveal":[82],"dynamics":[84],"overlap":[86,93],"across":[87],"individual":[88],"heads":[89],"layers.":[91],"demonstrates":[94],"that":[95,120,132],"data":[97],"can":[98],"guide":[99],"model":[101],"distributing":[103],"its":[104,140],"weights":[106],"more":[107],"effectively.":[108],"We":[109],"introduce":[110],"fixation-attention":[112],"intersection":[113],"(FAX)":[114],"loss,":[115,138],"novel":[117],"loss":[118],"function":[119],"significantly":[121],"improves":[122],"performance":[124],"high":[126],"conditions.":[128],"Our":[129],"results":[130],"show":[131],"ViT,":[133],"when":[134],"trained":[135],"with":[136,142],"FAX":[137],"aligns":[139],"patterns.":[145],"gaze-informed":[147],"approach":[148],"has":[149],"significant":[150],"potential":[151],"for":[152],"driver":[153],"behavior":[154],"analysis,":[155],"well":[157],"broader":[159],"applications":[160],"human-centered":[162],"AI":[163],"systems,":[164],"extending":[165],"ViT\u2019s":[166],"use":[167],"visual":[170],"environments.":[171]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
