{"id":"https://openalex.org/W7131893792","doi":"https://doi.org/10.1109/sii64115.2026.11404653","title":"Texting-While-Walking Detection in Real-World Environments Using Vision-Language Models with Prompt Engineering","display_name":"Texting-While-Walking Detection in Real-World Environments Using Vision-Language Models with Prompt Engineering","publication_year":2026,"publication_date":"2026-01-11","ids":{"openalex":"https://openalex.org/W7131893792","doi":"https://doi.org/10.1109/sii64115.2026.11404653"},"language":null,"primary_location":{"id":"doi:10.1109/sii64115.2026.11404653","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sii64115.2026.11404653","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE/SICE International Symposium on System Integration (SII)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5127194944","display_name":"Seungpyo Choi","orcid":null},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Seungpyo Choi","raw_affiliation_strings":["The University of Tokyo,Graduate School of Frontier Sciences,Department of Human and Engineered Environmental Studies,Chiba,Japan,277-8563"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo,Graduate School of Frontier Sciences,Department of Human and Engineered Environmental Studies,Chiba,Japan,277-8563","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127282528","display_name":"Jiaxu Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I159385669","display_name":"Chiba University","ror":"https://ror.org/01hjzeq58","country_code":"JP","type":"education","lineage":["https://openalex.org/I159385669"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Jiaxu Wu","raw_affiliation_strings":["The University of Tokyo,Institute of Engineering Innovation, Graduate School of Engineering,Chiba,Japan,277-8563"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo,Institute of Engineering Innovation, Graduate School of Engineering,Chiba,Japan,277-8563","institution_ids":["https://openalex.org/I159385669"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100774642","display_name":"Qi An","orcid":"https://orcid.org/0000-0002-6444-7157"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Qi An","raw_affiliation_strings":["The University of Tokyo,Graduate School of Frontier Sciences,Department of Human and Engineered Environmental Studies,Chiba,Japan,277-8563"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo,Graduate School of Frontier Sciences,Department of Human and Engineered Environmental Studies,Chiba,Japan,277-8563","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5127137179","display_name":"Atsushi Yamashita","orcid":null},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Atsushi Yamashita","raw_affiliation_strings":["The University of Tokyo,Graduate School of Frontier Sciences,Department of Human and Engineered Environmental Studies,Chiba,Japan,277-8563"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo,Graduate School of Frontier Sciences,Department of Human and Engineered Environmental Studies,Chiba,Japan,277-8563","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5127194944"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.48899431,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"612","last_page":"617"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5077999830245972,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5077999830245972,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.03999999910593033,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.03480000048875809,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7213000059127808},{"id":"https://openalex.org/keywords/pedestrian","display_name":"Pedestrian","score":0.6563000082969666},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5404000282287598},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5013999938964844},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.49779999256134033},{"id":"https://openalex.org/keywords/pedestrian-detection","display_name":"Pedestrian detection","score":0.45669999718666077},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.3596999943256378}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7213000059127808},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7091000080108643},{"id":"https://openalex.org/C2777113093","wikidata":"https://www.wikidata.org/wiki/Q221488","display_name":"Pedestrian","level":2,"score":0.6563000082969666},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6292999982833862},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5404000282287598},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5013999938964844},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.49779999256134033},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4697999954223633},{"id":"https://openalex.org/C2780156472","wikidata":"https://www.wikidata.org/wiki/Q2355550","display_name":"Pedestrian detection","level":3,"score":0.45669999718666077},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4339999854564667},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.3596999943256378},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.34540000557899475},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3246000111103058},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.31049999594688416},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.29660001397132874},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.29660001397132874},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.29350000619888306},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.28700000047683716},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.2865999937057495},{"id":"https://openalex.org/C2778827112","wikidata":"https://www.wikidata.org/wiki/Q22245680","display_name":"Feature engineering","level":3,"score":0.2773999869823456},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.27390000224113464},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2563999891281128}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/sii64115.2026.11404653","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sii64115.2026.11404653","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE/SICE International Symposium on System Integration (SII)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4329793155193329,"display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322832","display_name":"University of Tokyo","ror":"https://ror.org/057zh3y96"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2105431345","https://openalex.org/W2771583656","https://openalex.org/W2905513778","https://openalex.org/W2910164606","https://openalex.org/W2963287796","https://openalex.org/W2991484432","https://openalex.org/W3003348829","https://openalex.org/W3138277108","https://openalex.org/W3198377975","https://openalex.org/W4210536149","https://openalex.org/W4402727764","https://openalex.org/W4411231957","https://openalex.org/W4415798584"],"related_works":[],"abstract_inverted_index":{"Smartphone-induced":[0],"\"texting":[1],"while":[2,145],"walking\"":[3],"poses":[4,64],"growing":[5],"safety":[6],"risks":[7],"not":[8],"only":[9],"in":[10,16,120,151,203],"public":[11],"shared":[12],"spaces":[13],"but":[14],"also":[15],"robot":[17],"navigation":[18,227],"scenarios":[19],"where":[20],"humans":[21],"and":[22,57,88,97,103,108,172,176,209,226],"robots":[23],"coexist.":[24],"To":[25,68],"mitigate":[26],"these":[27,49],"risks,":[28],"recent":[29],"studies":[30],"have":[31],"developed":[32],"pedestrian":[33,154],"behavior":[34,78],"detection":[35,59],"models":[36,50],"that":[37,80,130,164,189],"aim":[38],"to":[39,140,180,219],"recognize":[40],"when":[41,61],"people":[42],"are":[43],"distracted":[44],"by":[45],"their":[46],"smartphones.":[47],"However,":[48],"still":[51],"suffer":[52],"from":[53,157],"high":[54],"false-positive":[55,170],"rates":[56],"reduced":[58],"accuracy":[60],"visually":[62],"similar":[63],"or":[65],"occlusions":[66],"occur.":[67],"address":[69],"this":[70],"issue,":[71],"we":[72,94],"propose":[73],"a":[74,121,126,181,213,220],"Vision-Language":[75],"Model":[76],"(VLM)-based":[77],"detector":[79],"exploits":[81],"VLMs":[82],"pretrained":[83],"on":[84],"large":[85],"image-text":[86],"datasets":[87],"capable":[89],"of":[90,223],"global-context":[91],"inference.":[92],"Specifically,":[93],"leverage":[95],"LLaVA-7B":[96],"systematically":[98],"evaluate":[99],"three":[100],"prompt-engineering":[101],"schemes\u2014chain-of-thought":[102],"self-consistency":[104],"under":[105,111],"zero-shot":[106],"settings,":[107],"few-shot":[109,112],"prompting":[110],"settings.":[113],"We":[114],"conducted":[115],"the":[116,133,158],"dataset":[117,208],"generation":[118],"experiment":[119],"typical":[122],"indoor":[123],"hall":[124],"with":[125,194],"centrally":[127],"placed":[128],"table":[129],"intermittently":[131],"occluded":[132],"robot\u2019s":[134,159],"view.":[135],"During":[136],"each":[137],"session,":[138],"four":[139],"six":[141],"participants":[142],"walked":[143],"freely":[144],"performing":[146],"nine":[147],"everyday":[148],"actions,":[149],"resulting":[150],"11,815":[152],"annotated":[153],"images":[155],"captured":[156],"perspective.":[160],"Experimental":[161],"results":[162,211],"show":[163],"our":[165],"VLM-based":[166,217],"pipeline":[167],"significantly":[168],"reduces":[169],"detections":[171],"improves":[173],"both":[174],"precision":[175],"overall":[177],"F1-score":[178],"compared":[179],"conventional":[182],"pose-based":[183],"LSTM":[184],"baseline.":[185],"These":[186],"gains":[187],"demonstrate":[188],"combining":[190],"large-scale":[191],"VLM":[192],"reasoning":[193],"specially":[195],"designed":[196],"prompts":[197],"can":[198],"overcome":[199],"long-standing":[200],"misclassification":[201],"issues":[202],"existing":[204],"approaches.":[205],"Our":[206],"curated":[207],"prompt-analysis":[210],"provide":[212],"foundation":[214],"for":[215],"extending":[216],"perception":[218],"wide":[221],"range":[222],"camera-based":[224],"monitoring":[225],"systems.":[228]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-02-28T00:00:00"}
