{"id":"https://openalex.org/W4394780884","doi":"https://doi.org/10.1145/3649902.3653439","title":"A Transformer-Based Model for the Prediction of Human Gaze Behavior on Videos","display_name":"A Transformer-Based Model for the Prediction of Human Gaze Behavior on Videos","publication_year":2024,"publication_date":"2024-05-31","ids":{"openalex":"https://openalex.org/W4394780884","doi":"https://doi.org/10.1145/3649902.3653439"},"language":"en","primary_location":{"id":"doi:10.1145/3649902.3653439","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3649902.3653439","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 Symposium on Eye Tracking Research and Applications","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2404.07351","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049341278","display_name":"S\u00fcleyman \u00d6zdel","orcid":"https://orcid.org/0000-0002-3390-6154"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"S\u00fcleyman \u00d6zdel","raw_affiliation_strings":["Human-Centered Technologies for Learning, Technical University of Munich, Germany"],"raw_orcid":"https://orcid.org/0000-0002-3390-6154","affiliations":[{"raw_affiliation_string":"Human-Centered Technologies for Learning, Technical University of Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100624647","display_name":"Yao Rong","orcid":"https://orcid.org/0000-0002-6031-3741"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Yao Rong","raw_affiliation_strings":["Technical University of Munich, Germany"],"raw_orcid":"https://orcid.org/0000-0002-6031-3741","affiliations":[{"raw_affiliation_string":"Technical University of Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029685269","display_name":"Berat Mert Albaba","orcid":"https://orcid.org/0000-0002-3406-8412"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Berat Mert Albaba","raw_affiliation_strings":["Department of Computer Science/AIT Lab, ETH Z\u00fcrich, Switzerland"],"raw_orcid":"https://orcid.org/0000-0002-3406-8412","affiliations":[{"raw_affiliation_string":"Department of Computer Science/AIT Lab, ETH Z\u00fcrich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087296750","display_name":"Yen\u2010Ling Kuo","orcid":"https://orcid.org/0000-0002-6433-6713"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yen-Ling Kuo","raw_affiliation_strings":["Computer Science and Artificial Intelligence Laboratory, Massachusetts Institute of Technology, United States"],"raw_orcid":"https://orcid.org/0000-0002-6433-6713","affiliations":[{"raw_affiliation_string":"Computer Science and Artificial Intelligence Laboratory, Massachusetts Institute of Technology, United States","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100442218","display_name":"Xi Wang","orcid":"https://orcid.org/0000-0001-5442-1116"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Xi Wang","raw_affiliation_strings":["Department of Computer Science, ETH, Switzerland"],"raw_orcid":"https://orcid.org/0000-0001-5442-1116","affiliations":[{"raw_affiliation_string":"Department of Computer Science, ETH, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008809634","display_name":"Enkelejda Kasneci","orcid":"https://orcid.org/0000-0003-3146-4484"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Enkelejda Kasneci","raw_affiliation_strings":["Human-Centered Technologies for Learning, Technical University of Munich, Germany"],"raw_orcid":"https://orcid.org/0000-0003-3146-4484","affiliations":[{"raw_affiliation_string":"Human-Centered Technologies for Learning, Technical University of Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5049341278"],"corresponding_institution_ids":["https://openalex.org/I62916508"],"apc_list":null,"apc_paid":null,"fwci":1.9998,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.8725777,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11707","display_name":"Gaze Tracking and Assistive Technology","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/gaze","display_name":"Gaze","score":0.7903696298599243},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7107079029083252},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6873748898506165},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48073673248291016},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.38734400272369385},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.32815447449684143},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12669238448143005},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.0852007269859314},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.07616975903511047}],"concepts":[{"id":"https://openalex.org/C2779916870","wikidata":"https://www.wikidata.org/wiki/Q14467155","display_name":"Gaze","level":2,"score":0.7903696298599243},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7107079029083252},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6873748898506165},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48073673248291016},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.38734400272369385},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.32815447449684143},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12669238448143005},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0852007269859314},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.07616975903511047}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3649902.3653439","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3649902.3653439","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 Symposium on Eye Tracking Research and Applications","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2404.07351","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.07351","pdf_url":"https://arxiv.org/pdf/2404.07351","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2404.07351","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.07351","pdf_url":"https://arxiv.org/pdf/2404.07351","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4394780884.pdf","grobid_xml":"https://content.openalex.org/works/W4394780884.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W2048959912","https://openalex.org/W2108598243","https://openalex.org/W2136668269","https://openalex.org/W2194775991","https://openalex.org/W2212494831","https://openalex.org/W2464922996","https://openalex.org/W2559655401","https://openalex.org/W2612448678","https://openalex.org/W2741156154","https://openalex.org/W2795307598","https://openalex.org/W2799002257","https://openalex.org/W2895299763","https://openalex.org/W2915250631","https://openalex.org/W2963524571","https://openalex.org/W2980325478","https://openalex.org/W3005758474","https://openalex.org/W3014274848","https://openalex.org/W3034667697","https://openalex.org/W3034964972","https://openalex.org/W3086523768","https://openalex.org/W3089286977","https://openalex.org/W3100100339","https://openalex.org/W3118648637","https://openalex.org/W3119906317","https://openalex.org/W3138572511","https://openalex.org/W3191734489","https://openalex.org/W4302436740","https://openalex.org/W4394780790"],"related_works":["https://openalex.org/W2385108104","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2772917594","https://openalex.org/W2775347418","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747"],"abstract_inverted_index":{"Eye-tracking":[0],"applications":[1],"that":[2,78],"utilize":[3],"the":[4,18,44,85,106,120],"human":[5,33,50,63,82,93,133],"gaze":[6,34,51,64,94,124,134],"in":[7],"video":[8,21],"understanding":[9],"tasks":[10,141],"have":[11],"become":[12],"increasingly":[13],"important.":[14],"To":[15],"effectively":[16],"automate":[17],"process":[19],"of":[20,49,88,122],"analysis":[22],"based":[23],"on":[24,113],"eye-tracking":[25,99],"data,":[26],"it":[27],"is":[28,145],"important":[29],"to":[30,43,74,131],"accurately":[31],"replicate":[32,132],"behavior.":[35,65,95],"However,":[36],"this":[37,54],"task":[38],"presents":[39],"significant":[40],"challenges":[41],"due":[42],"inherent":[45],"complexity":[46],"and":[47,91,136],"ambiguity":[48],"patterns.":[52],"In":[53],"work,":[55],"we":[56],"introduce":[57],"a":[58,69,81,110],"novel":[59],"method":[60,126],"for":[61,139],"simulating":[62,92],"Our":[66,116],"approach":[67],"uses":[68],"transformer-based":[70],"reinforcement":[71],"learning":[72],"algorithm":[73],"train":[75],"an":[76,98],"agent":[77],"acts":[79],"as":[80,147],"observer,":[83],"with":[84,109],"primary":[86,111],"role":[87],"watching":[89],"videos":[90,103],"We":[96],"employed":[97],"dataset":[100],"gathered":[101],"from":[102],"generated":[104],"by":[105,127],"VirtualHome":[107],"simulator,":[108],"focus":[112],"activity":[114],"recognition.":[115],"experimental":[117],"results":[118],"demonstrate":[119],"effectiveness":[121],"our":[123],"prediction":[125],"highlighting":[128],"its":[129,137],"capability":[130],"behavior":[135],"applicability":[138],"downstream":[140],"where":[142],"real":[143],"human-gaze":[144],"used":[146],"input.":[148]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":1}],"updated_date":"2026-05-30T09:04:40.226872","created_date":"2025-10-10T00:00:00"}
