{"id":"https://openalex.org/W7127909493","doi":"https://doi.org/10.48550/arxiv.2602.05671","title":"(Computer) Vision in Action: Comparing Remote Sighted Assistance and a Multimodal Voice Agent in Inspection Sequences","display_name":"(Computer) Vision in Action: Comparing Remote Sighted Assistance and a Multimodal Voice Agent in Inspection Sequences","publication_year":2026,"publication_date":"2026-02-05","ids":{"openalex":"https://openalex.org/W7127909493","doi":"https://doi.org/10.48550/arxiv.2602.05671"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.05671","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125242080","display_name":"Damien Rudaz","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Rudaz, Damien","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048793952","display_name":"Barbara Nino Carreras","orcid":"https://orcid.org/0000-0002-2742-4714"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Carreras, Barbara Nino","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5096881466","display_name":"Sara Merlino","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Merlino, Sara","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086980737","display_name":"Brian Lystgaard Due","orcid":"https://orcid.org/0000-0003-3670-9102"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Due, Brian L.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5005523245","display_name":"Barry Brown","orcid":"https://orcid.org/0000-0002-9710-6607"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brown, Barry","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5125242080"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12128","display_name":"AI in Service Interactions","score":0.47130000591278076,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12128","display_name":"AI in Service Interactions","score":0.47130000591278076,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.17839999496936798,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10914","display_name":"Tactile and Sensory Interactions","score":0.04619999974966049,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5024999976158142},{"id":"https://openalex.org/keywords/multimodal-interaction","display_name":"Multimodal interaction","score":0.4749999940395355},{"id":"https://openalex.org/keywords/voice-command-device","display_name":"Voice command device","score":0.46380001306533813},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.43070000410079956},{"id":"https://openalex.org/keywords/visually-impaired","display_name":"Visually impaired","score":0.42730000615119934},{"id":"https://openalex.org/keywords/contrast","display_name":"Contrast (vision)","score":0.392300009727478}],"concepts":[{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.5256999731063843},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5024999976158142},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.499099999666214},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.4749999940395355},{"id":"https://openalex.org/C178718744","wikidata":"https://www.wikidata.org/wiki/Q2350070","display_name":"Voice command device","level":2,"score":0.46380001306533813},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.43070000410079956},{"id":"https://openalex.org/C3020106864","wikidata":"https://www.wikidata.org/wiki/Q737460","display_name":"Visually impaired","level":2,"score":0.42730000615119934},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.392300009727478},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.3490999937057495},{"id":"https://openalex.org/C194943564","wikidata":"https://www.wikidata.org/wiki/Q1129049","display_name":"Participant observation","level":2,"score":0.3199999928474426},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2743000090122223},{"id":"https://openalex.org/C182964821","wikidata":"https://www.wikidata.org/wiki/Q7939498","display_name":"Voice analysis","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2567000091075897},{"id":"https://openalex.org/C3018906133","wikidata":"https://www.wikidata.org/wiki/Q737460","display_name":"Partially sighted","level":3,"score":0.2533000111579895},{"id":"https://openalex.org/C2987082051","wikidata":"https://www.wikidata.org/wiki/Q223642","display_name":"Human interaction","level":2,"score":0.2513999938964844}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.05671","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.05671","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.05671","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.05671","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Does":[0],"human-AI":[1],"assistance":[2],"unfold":[3],"in":[4,118],"the":[5,19,29,37,56,80,85,95,113],"same":[6,86,96],"way":[7],"as":[8,125],"human-human":[9],"assistance?":[10],"This":[11,102],"research":[12],"explores":[13],"what":[14],"can":[15],"be":[16],"learned":[17],"from":[18,50],"expertise":[20],"of":[21,31,40,46],"blind":[22,68],"individuals":[23],"and":[24,35,66],"sighted":[25,64,146],"volunteers":[26],"to":[27,72,106],"inform":[28],"design":[30],"multimodal":[32,91,126],"voice":[33,92,127],"agents":[34,128],"address":[36],"enduring":[38],"challenge":[39],"proactivity.":[41],"Drawing":[42],"on":[43,76,94],"granular":[44],"analysis":[45],"two":[47],"representative":[48],"fragments":[49],"a":[51,67,74,77,90,98,138],"larger":[52],"corpus,":[53],"we":[54],"contrast":[55],"practices":[57,112],"co-produced":[58],"by":[59,143],"an":[60],"experienced":[61],"human":[62,144],"remote":[63,145],"assistant":[65],"participant-as":[69],"they":[70,135],"collaborate":[71],"find":[73],"stain":[75],"blanket":[78],"over":[79],"phone-with":[81],"those":[82],"achieved":[83],"when":[84],"participant":[87],"worked":[88],"with":[89],"agent":[93,114],"task,":[97],"few":[99],"moments":[100],"earlier.":[101],"comparison":[103],"enables":[104],"us":[105],"specify":[107],"precisely":[108],"which":[109],"fundamental":[110],"proactive":[111],"did":[115],"not":[116],"enact":[117],"situ.":[119],"We":[120],"conclude":[121],"that,":[122],"so":[123],"long":[124],"cannot":[129],"produce":[130],"environmentally":[131],"occasioned":[132],"vision-based":[133],"actions,":[134],"will":[136],"lack":[137],"key":[139],"resource":[140],"relied":[141],"upon":[142],"assistants.":[147]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-07T00:00:00"}
