{"id":"https://openalex.org/W4399199832","doi":"https://doi.org/10.1145/3656650.3656677","title":"VQAsk: a multimodal Android GPT-based application to help blind users visualize pictures","display_name":"VQAsk: a multimodal Android GPT-based application to help blind users visualize pictures","publication_year":2024,"publication_date":"2024-05-31","ids":{"openalex":"https://openalex.org/W4399199832","doi":"https://doi.org/10.1145/3656650.3656677"},"language":"en","primary_location":{"id":"doi:10.1145/3656650.3656677","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3656650.3656677","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3656650.3656677","source":null,"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Advanced Visual Interfaces","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3656650.3656677","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055512639","display_name":"Maria De Marsico","orcid":"https://orcid.org/0000-0002-1391-8502"},"institutions":[{"id":"https://openalex.org/I861853513","display_name":"Sapienza University of Rome","ror":"https://ror.org/02be6w209","country_code":"IT","type":"education","lineage":["https://openalex.org/I861853513"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Maria De Marsico","raw_affiliation_strings":["Computer Science, Sapienza University of Rome, Italy"],"raw_orcid":"https://orcid.org/0000-0002-1391-8502","affiliations":[{"raw_affiliation_string":"Computer Science, Sapienza University of Rome, Italy","institution_ids":["https://openalex.org/I861853513"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5098955125","display_name":"Chiara Giacanelli","orcid":"https://orcid.org/0009-0006-6919-7771"},"institutions":[{"id":"https://openalex.org/I861853513","display_name":"Sapienza University of Rome","ror":"https://ror.org/02be6w209","country_code":"IT","type":"education","lineage":["https://openalex.org/I861853513"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Chiara Giacanelli","raw_affiliation_strings":["Computer Science, Sapienza University of Rome, Italy"],"raw_orcid":"https://orcid.org/0009-0006-6919-7771","affiliations":[{"raw_affiliation_string":"Computer Science, Sapienza University of Rome, Italy","institution_ids":["https://openalex.org/I861853513"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5098955126","display_name":"Clizia Giorgia Manganaro","orcid":"https://orcid.org/0009-0009-0295-6958"},"institutions":[{"id":"https://openalex.org/I861853513","display_name":"Sapienza University of Rome","ror":"https://ror.org/02be6w209","country_code":"IT","type":"education","lineage":["https://openalex.org/I861853513"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Clizia Giorgia Manganaro","raw_affiliation_strings":["Computer Science, Sapienza University of Rome, Italy"],"raw_orcid":"https://orcid.org/0009-0009-0295-6958","affiliations":[{"raw_affiliation_string":"Computer Science, Sapienza University of Rome, Italy","institution_ids":["https://openalex.org/I861853513"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049135801","display_name":"A. Palma","orcid":"https://orcid.org/0009-0008-4332-9179"},"institutions":[{"id":"https://openalex.org/I861853513","display_name":"Sapienza University of Rome","ror":"https://ror.org/02be6w209","country_code":"IT","type":"education","lineage":["https://openalex.org/I861853513"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Alessio Palma","raw_affiliation_strings":["Computer Science, Sapienza University of Rome, Italy"],"raw_orcid":"https://orcid.org/0009-0008-4332-9179","affiliations":[{"raw_affiliation_string":"Computer Science, Sapienza University of Rome, Italy","institution_ids":["https://openalex.org/I861853513"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086762848","display_name":"Davide Santoro","orcid":"https://orcid.org/0009-0008-0837-8936"},"institutions":[{"id":"https://openalex.org/I861853513","display_name":"Sapienza University of Rome","ror":"https://ror.org/02be6w209","country_code":"IT","type":"education","lineage":["https://openalex.org/I861853513"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Davide Santoro","raw_affiliation_strings":["Computer Science, Sapienza University of Rome, Italy"],"raw_orcid":"https://orcid.org/0009-0008-0837-8936","affiliations":[{"raw_affiliation_string":"Computer Science, Sapienza University of Rome, Italy","institution_ids":["https://openalex.org/I861853513"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5055512639"],"corresponding_institution_ids":["https://openalex.org/I861853513"],"apc_list":null,"apc_paid":null,"fwci":0.9523,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.75125732,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10914","display_name":"Tactile and Sensory Interactions","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7932097911834717},{"id":"https://openalex.org/keywords/visually-impaired","display_name":"Visually impaired","score":0.718459963798523},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.657050371170044},{"id":"https://openalex.org/keywords/haptic-technology","display_name":"Haptic technology","score":0.6235295534133911},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.6208468079566956},{"id":"https://openalex.org/keywords/multimodality","display_name":"Multimodality","score":0.5844884514808655},{"id":"https://openalex.org/keywords/android","display_name":"Android (operating system)","score":0.5786427855491638},{"id":"https://openalex.org/keywords/multimodal-interaction","display_name":"Multimodal interaction","score":0.5639005303382874},{"id":"https://openalex.org/keywords/sight","display_name":"Sight","score":0.5124924182891846},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4975281059741974},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.4661814868450165},{"id":"https://openalex.org/keywords/visual-impairment","display_name":"Visual impairment","score":0.45878416299819946},{"id":"https://openalex.org/keywords/screen-reader","display_name":"Screen reader","score":0.44821423292160034},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3764346241950989},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37046268582344055},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.17864447832107544}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7932097911834717},{"id":"https://openalex.org/C3020106864","wikidata":"https://www.wikidata.org/wiki/Q737460","display_name":"Visually impaired","level":2,"score":0.718459963798523},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.657050371170044},{"id":"https://openalex.org/C152086174","wikidata":"https://www.wikidata.org/wiki/Q3030571","display_name":"Haptic technology","level":2,"score":0.6235295534133911},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.6208468079566956},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.5844884514808655},{"id":"https://openalex.org/C557433098","wikidata":"https://www.wikidata.org/wiki/Q94","display_name":"Android (operating system)","level":2,"score":0.5786427855491638},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.5639005303382874},{"id":"https://openalex.org/C1517167","wikidata":"https://www.wikidata.org/wiki/Q1134322","display_name":"Sight","level":2,"score":0.5124924182891846},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4975281059741974},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.4661814868450165},{"id":"https://openalex.org/C2781372952","wikidata":"https://www.wikidata.org/wiki/Q737460","display_name":"Visual impairment","level":2,"score":0.45878416299819946},{"id":"https://openalex.org/C2778535757","wikidata":"https://www.wikidata.org/wiki/Q1328864","display_name":"Screen reader","level":3,"score":0.44821423292160034},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3764346241950989},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37046268582344055},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.17864447832107544},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C118552586","wikidata":"https://www.wikidata.org/wiki/Q7867","display_name":"Psychiatry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3656650.3656677","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3656650.3656677","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3656650.3656677","source":null,"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Advanced Visual Interfaces","raw_type":"proceedings-article"},{"id":"pmh:oai:iris.uniroma1.it:11573/1726579","is_oa":true,"landing_page_url":"https://hdl.handle.net/11573/1726579","pdf_url":"https://iris.uniroma1.it/bitstream/11573/1726579/1/DeMarsico_VQAsk-multimodal-Android_2024.pdf","source":{"id":"https://openalex.org/S4377196107","display_name":"IRIS Research product catalog (Sapienza University of Rome)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"doi:10.1145/3656650.3656677","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3656650.3656677","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3656650.3656677","source":null,"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Advanced Visual Interfaces","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399199832.pdf","grobid_xml":"https://content.openalex.org/works/W4399199832.grobid-xml"},"referenced_works_count":8,"referenced_works":["https://openalex.org/W2090048052","https://openalex.org/W2116680608","https://openalex.org/W2745461083","https://openalex.org/W2950239670","https://openalex.org/W2963622213","https://openalex.org/W3015965768","https://openalex.org/W3159131359","https://openalex.org/W3206946574"],"related_works":["https://openalex.org/W2166248296","https://openalex.org/W2040431336","https://openalex.org/W2995743050","https://openalex.org/W3022313497","https://openalex.org/W332756932","https://openalex.org/W2976816735","https://openalex.org/W2013013717","https://openalex.org/W66256988","https://openalex.org/W2068964259","https://openalex.org/W4302857920"],"abstract_inverted_index":{"VQAsk":[0,49],"is":[1,50,101],"an":[2],"Android":[3],"application":[4],"that":[5,42,110],"helps":[6],"visually":[7,55,137],"impaired":[8,56,138],"users":[9,57,69],"to":[10,21,53,87,135],"get":[11],"information":[12],"about":[13,92],"images":[14],"framed":[15],"by":[16,103],"their":[17],"smartphones.":[18],"It":[19],"enables":[20],"interact":[22],"with":[23,70],"one\u2019s":[24],"photographs":[25],"or":[26],"the":[27,93,96,122],"surrounding":[28],"visual":[29,74],"environment":[30],"through":[31,106],"a":[32,118,128],"question-and-answer":[33],"interface":[34],"integrating":[35],"three":[36],"modalities:":[37],"speech":[38],"interaction,":[39,46],"haptic":[40],"feedback":[41],"facilitates":[43],"navigation":[44],"and":[45,47,83],"sight.":[48],"primarily":[51],"designed":[52],"help":[54],"mentally":[58],"visualize":[59],"what":[60],"they":[61],"cannot":[62],"see,":[63],"but":[64],"it":[65,79],"can":[66],"also":[67],"accommodate":[68],"varying":[71],"levels":[72],"of":[73,117,124],"ability.":[75],"To":[76],"this":[77,125],"aim,":[78],"embeds":[80],"advanced":[81,107],"NLP":[82],"Computer":[84],"Vision":[85],"techniques":[86],"answer":[88],"all":[89],"user":[90],"questions":[91],"image":[94,113],"on":[95],"cell":[97],"screen.":[98],"Image":[99],"processing":[100],"enhanced":[102],"background":[104],"removal":[105],"segmentation":[108],"models":[109],"identify":[111],"important":[112],"elements.":[114],"The":[115],"outcomes":[116],"testing":[119],"phase":[120],"confirmed":[121],"importance":[123],"project":[126],"as":[127],"first":[129],"attempt":[130],"at":[131],"using":[132],"AI-supported":[133],"multimodality":[134],"enhance":[136],"users\u2019":[139],"experience.":[140]},"counts_by_year":[{"year":2025,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
