{"id":"https://openalex.org/W7135101817","doi":"https://doi.org/10.1145/3776734.3794517","title":"Real-Time Vision for Socially Aware Robots: Gesture, Pointing, and Visual Engagement Estimation","display_name":"Real-Time Vision for Socially Aware Robots: Gesture, Pointing, and Visual Engagement Estimation","publication_year":2026,"publication_date":"2026-03-12","ids":{"openalex":"https://openalex.org/W7135101817","doi":"https://doi.org/10.1145/3776734.3794517"},"language":null,"primary_location":{"id":"doi:10.1145/3776734.3794517","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3776734.3794517","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the 21st ACM/IEEE International Conference on Human-Robot Interaction","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011879566","display_name":"Bartomeu Pou","orcid":"https://orcid.org/0000-0001-8634-2316"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Bartomeu Pou","raw_affiliation_strings":["IIIA-CSIC, Bellaterra, Spain"],"raw_orcid":"https://orcid.org/0000-0001-8634-2316","affiliations":[{"raw_affiliation_string":"IIIA-CSIC, Bellaterra, Spain","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5128918956","display_name":"Raquel Ros","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131846","display_name":"Artificial Intelligence Research Institute","ror":"https://ror.org/03c0ach84","country_code":"ES","type":"facility","lineage":["https://openalex.org/I134820265","https://openalex.org/I4210131846"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Raquel Ros","raw_affiliation_strings":["IIIA-CSIC, Barcelona, Spain"],"raw_orcid":"https://orcid.org/0000-0002-8295-6932","affiliations":[{"raw_affiliation_string":"IIIA-CSIC, Barcelona, Spain","institution_ids":["https://openalex.org/I4210131846"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5011879566"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.50336155,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"860","last_page":"864"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.35120001435279846,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.35120001435279846,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.10639999806880951,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11707","display_name":"Gaze Tracking and Assistive Technology","score":0.09000000357627869,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/estimation","display_name":"Estimation","score":0.451200008392334},{"id":"https://openalex.org/keywords/eye-tracking","display_name":"Eye tracking","score":0.3100000023841858},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.30889999866485596},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.29600000381469727},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.29269999265670776}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5368000268936157},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.451200008392334},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4171999990940094},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3928000032901764},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.34310001134872437},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.3100000023841858},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3091999888420105},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.30889999866485596},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.29600000381469727},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.259799987077713}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3776734.3794517","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3776734.3794517","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the 21st ACM/IEEE International Conference on Human-Robot Interaction","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.5299075841903687}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W587016256","https://openalex.org/W2062614785","https://openalex.org/W2079718495","https://openalex.org/W2240541372","https://openalex.org/W2399452755","https://openalex.org/W4206359847","https://openalex.org/W4206937573","https://openalex.org/W4390873964","https://openalex.org/W4392653080"],"related_works":[],"abstract_inverted_index":{"Socially":[0],"aware":[1],"robots":[2],"must":[3],"interpret":[4],"non-verbal":[5],"cues":[6],"such":[7],"as":[8],"gaze,":[9],"gestures":[10],"and":[11,28,37,40,49,60],"pointing":[12,33],"under":[13],"strict":[14],"computational":[15],"constraints.":[16],"We":[17],"present":[18],"a":[19,41],"lightweight":[20],"vision":[21],"framework":[22],"that":[23],"extends":[24],"ROS4HRI":[25],"with":[26],"hand":[27],"head":[29],"gesture":[30],"recognition,":[31],"hybrid":[32],"estimation":[34],"for":[35],"short":[36],"long":[38],"distances,":[39],"multi-target":[42],"visual":[43],"engagement":[44],"metric":[45],"over":[46],"both":[47],"agents":[48],"objects.":[50],"All":[51],"components":[52],"run":[53],"in":[54],"real":[55],"time":[56],"on":[57],"embedded":[58],"hardware":[59],"are":[61],"validated":[62],"through":[63],"proof-of-concept":[64],"experiments.":[65]},"counts_by_year":[],"updated_date":"2026-03-14T06:41:57.775601","created_date":"2026-03-13T00:00:00"}
