{"id":"https://openalex.org/W4405785760","doi":"https://doi.org/10.1109/iros58592.2024.10801813","title":"A Large Vision-Language Model based Environment Perception System for Visually Impaired People","display_name":"A Large Vision-Language Model based Environment Perception System for Visually Impaired People","publication_year":2024,"publication_date":"2024-10-14","ids":{"openalex":"https://openalex.org/W4405785760","doi":"https://doi.org/10.1109/iros58592.2024.10801813"},"language":"en","primary_location":{"id":"doi:10.1109/iros58592.2024.10801813","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros58592.2024.10801813","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2504.18027","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089074483","display_name":"Zezhou Chen","orcid":"https://orcid.org/0000-0001-6533-8342"},"institutions":[{"id":"https://openalex.org/I6507939","display_name":"China United Network Communications Group (China)","ror":"https://ror.org/028w99c90","country_code":"CN","type":"company","lineage":["https://openalex.org/I6507939"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zezhou Chen","raw_affiliation_strings":["China Unicom,AI Innovation Center,Beijing,China,100013"],"affiliations":[{"raw_affiliation_string":"China Unicom,AI Innovation Center,Beijing,China,100013","institution_ids":["https://openalex.org/I6507939"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101742159","display_name":"Zhaoxiang Liu","orcid":"https://orcid.org/0000-0002-1267-0277"},"institutions":[{"id":"https://openalex.org/I6507939","display_name":"China United Network Communications Group (China)","ror":"https://ror.org/028w99c90","country_code":"CN","type":"company","lineage":["https://openalex.org/I6507939"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaoxiang Liu","raw_affiliation_strings":["China Unicom,AI Innovation Center,Beijing,China,100013"],"affiliations":[{"raw_affiliation_string":"China Unicom,AI Innovation Center,Beijing,China,100013","institution_ids":["https://openalex.org/I6507939"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100437036","display_name":"Kai Wang","orcid":"https://orcid.org/0000-0002-6170-4744"},"institutions":[{"id":"https://openalex.org/I6507939","display_name":"China United Network Communications Group (China)","ror":"https://ror.org/028w99c90","country_code":"CN","type":"company","lineage":["https://openalex.org/I6507939"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Wang","raw_affiliation_strings":["China Unicom,AI Innovation Center,Beijing,China,100013"],"affiliations":[{"raw_affiliation_string":"China Unicom,AI Innovation Center,Beijing,China,100013","institution_ids":["https://openalex.org/I6507939"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021752084","display_name":"Kohou Wang","orcid":"https://orcid.org/0009-0007-5863-2288"},"institutions":[{"id":"https://openalex.org/I6507939","display_name":"China United Network Communications Group (China)","ror":"https://ror.org/028w99c90","country_code":"CN","type":"company","lineage":["https://openalex.org/I6507939"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kohou Wang","raw_affiliation_strings":["China Unicom,AI Innovation Center,Beijing,China,100013"],"affiliations":[{"raw_affiliation_string":"China Unicom,AI Innovation Center,Beijing,China,100013","institution_ids":["https://openalex.org/I6507939"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066958531","display_name":"Shiguo Lian","orcid":"https://orcid.org/0000-0003-4308-7049"},"institutions":[{"id":"https://openalex.org/I6507939","display_name":"China United Network Communications Group (China)","ror":"https://ror.org/028w99c90","country_code":"CN","type":"company","lineage":["https://openalex.org/I6507939"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiguo Lian","raw_affiliation_strings":["China Unicom,AI Innovation Center,Beijing,China,100013"],"affiliations":[{"raw_affiliation_string":"China Unicom,AI Innovation Center,Beijing,China,100013","institution_ids":["https://openalex.org/I6507939"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5089074483"],"corresponding_institution_ids":["https://openalex.org/I6507939"],"apc_list":null,"apc_paid":null,"fwci":0.7291,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.73579955,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"221","last_page":"228"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9553999900817871,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9553999900817871,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10914","display_name":"Tactile and Sensory Interactions","score":0.9484999775886536,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/visually-impaired","display_name":"Visually impaired","score":0.7782827019691467},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.7229976654052734},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6353766918182373},{"id":"https://openalex.org/keywords/impaired-vision","display_name":"Impaired Vision","score":0.5645240545272827},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5502585768699646},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.505810022354126},{"id":"https://openalex.org/keywords/machine-vision","display_name":"Machine vision","score":0.4922398626804352},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3970780372619629},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2320738136768341},{"id":"https://openalex.org/keywords/optometry","display_name":"Optometry","score":0.18485528230667114},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.07571297883987427}],"concepts":[{"id":"https://openalex.org/C3020106864","wikidata":"https://www.wikidata.org/wiki/Q737460","display_name":"Visually impaired","level":2,"score":0.7782827019691467},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.7229976654052734},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6353766918182373},{"id":"https://openalex.org/C2909902731","wikidata":"https://www.wikidata.org/wiki/Q737460","display_name":"Impaired Vision","level":2,"score":0.5645240545272827},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5502585768699646},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.505810022354126},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.4922398626804352},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3970780372619629},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2320738136768341},{"id":"https://openalex.org/C119767625","wikidata":"https://www.wikidata.org/wiki/Q618211","display_name":"Optometry","level":1,"score":0.18485528230667114},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.07571297883987427},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/iros58592.2024.10801813","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros58592.2024.10801813","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2504.18027","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2504.18027","pdf_url":"https://arxiv.org/pdf/2504.18027","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2504.18027","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2504.18027","pdf_url":"https://arxiv.org/pdf/2504.18027","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/13","score":0.6000000238418579,"display_name":"Climate action"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1895577753","https://openalex.org/W1903029394","https://openalex.org/W1994893618","https://openalex.org/W2102605133","https://openalex.org/W2136691781","https://openalex.org/W2193145675","https://openalex.org/W2399945122","https://openalex.org/W2403940793","https://openalex.org/W2412782625","https://openalex.org/W2545908109","https://openalex.org/W2552002300","https://openalex.org/W2561304097","https://openalex.org/W2575842049","https://openalex.org/W2587989515","https://openalex.org/W2611312712","https://openalex.org/W2611577320","https://openalex.org/W2611756113","https://openalex.org/W2736525866","https://openalex.org/W2741346289","https://openalex.org/W2749195516","https://openalex.org/W2784327374","https://openalex.org/W2786144434","https://openalex.org/W2791813490","https://openalex.org/W2792585818","https://openalex.org/W2795402370","https://openalex.org/W2795671290","https://openalex.org/W2796356930","https://openalex.org/W2796373441","https://openalex.org/W2963037989","https://openalex.org/W2963658615","https://openalex.org/W2970782003","https://openalex.org/W3025381604","https://openalex.org/W3138516171","https://openalex.org/W3170841864","https://openalex.org/W4312426743","https://openalex.org/W4384126598","https://openalex.org/W4385245566","https://openalex.org/W4389523832","https://openalex.org/W4393160204","https://openalex.org/W4405596328","https://openalex.org/W6752606552","https://openalex.org/W6784333009","https://openalex.org/W6789909235","https://openalex.org/W6797716411","https://openalex.org/W6800139874","https://openalex.org/W6810334672","https://openalex.org/W6849177959","https://openalex.org/W6850359928","https://openalex.org/W6851592950","https://openalex.org/W6854263694","https://openalex.org/W6854347851","https://openalex.org/W6855350031"],"related_works":["https://openalex.org/W2616129665","https://openalex.org/W2009573200","https://openalex.org/W4246305286","https://openalex.org/W3183921029","https://openalex.org/W4396689001","https://openalex.org/W3196015902","https://openalex.org/W3177313669","https://openalex.org/W4323567756","https://openalex.org/W2055772953","https://openalex.org/W4385711359"],"abstract_inverted_index":{"It":[0],"is":[1],"a":[2,34,59,79,106,117,178],"challenging":[3],"task":[4],"for":[5],"visually":[6,74,133,195],"impaired":[7,75,134,196],"people":[8,76,135,197],"to":[9,15,45,90,160,186,198],"perceive":[10,138,199],"their":[11],"surrounding":[12,49,201],"environment":[13,39,202],"due":[14],"the":[16,19,48,53,67,71,83,88,92,96,99,102,113,121,129,139,145,149,156,162,174,183,192,200],"complexity":[17],"of":[18,82,98,120,148,158,182],"natural":[20],"scenes.":[21],"Their":[22],"personal":[23],"and":[24,62,115,170],"social":[25],"activities":[26],"are":[27,124],"thus":[28],"highly":[29],"limited.":[30],"This":[31],"paper":[32,142],"introduces":[33],"Large":[35],"Vision-Language":[36],"Model(LVLM)":[37],"based":[38],"perception":[40],"system":[41,175,193],"which":[42],"helps":[43,194],"them":[44,65],"better":[46],"understand":[47],"environment,":[50],"by":[51,85,109,127],"capturing":[52],"current":[54],"scene":[55,84,103,184],"they":[56,123],"face":[57],"with":[58],"wearable":[60],"device,":[61],"then":[63],"letting":[64],"retrieve":[66,95],"analysis":[68],"results":[69],"through":[70],"device.":[72],"The":[73],"could":[77,176],"acquire":[78],"global":[80],"description":[81,119,181],"long":[86],"pressing":[87],"screen":[89],"activate":[91],"LVLM":[93,159],"output,":[94],"categories":[97],"objects":[100,122],"in":[101,126],"resulting":[104],"from":[105],"segmentation":[107,146],"model":[108],"tapping":[110],"or":[111],"swiping":[112],"screen,":[114],"get":[116],"detailed":[118],"interested":[125],"double-tapping":[128],"screen.":[130],"To":[131],"help":[132],"more":[136,179],"accurately":[137],"world,":[140],"this":[141],"proposes":[143],"incorporating":[144],"result":[147],"RGB":[150],"image":[151],"as":[152],"external":[153],"knowledge":[154],"into":[155],"input":[157],"reduce":[161],"LVLM\u2019s":[163],"hallucination.":[164],"Technical":[165],"experiments":[166,189],"on":[167],"POPE,":[168],"MME":[169],"LLaVA-QA90":[171],"show":[172,190],"that":[173,191],"provide":[177],"accurate":[180],"compared":[185],"Qwen-VL-Chat,":[187],"exploratory":[188],"effectively.":[203]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-14T08:04:32.555800","created_date":"2025-10-10T00:00:00"}
