{"id":"https://openalex.org/W4415366876","doi":"https://doi.org/10.1109/mic.2025.3622505","title":"Probing the Augmented Reality Scene Analysis Capabilities of Large Multimodal Models: Toward Reliable Real-Time Assessment Solutions","display_name":"Probing the Augmented Reality Scene Analysis Capabilities of Large Multimodal Models: Toward Reliable Real-Time Assessment Solutions","publication_year":2025,"publication_date":"2025-10-20","ids":{"openalex":"https://openalex.org/W4415366876","doi":"https://doi.org/10.1109/mic.2025.3622505"},"language":null,"primary_location":{"id":"doi:10.1109/mic.2025.3622505","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mic.2025.3622505","pdf_url":null,"source":{"id":"https://openalex.org/S205899252","display_name":"IEEE Internet Computing","issn_l":"1089-7801","issn":["1089-7801","1941-0131"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Internet Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016695945","display_name":"Lin Duan","orcid":"https://orcid.org/0000-0001-8515-6351"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lin Duan","raw_affiliation_strings":["Duke University, Durham, NC, USA"],"raw_orcid":"https://orcid.org/0000-0001-8515-6351","affiliations":[{"raw_affiliation_string":"Duke University, Durham, NC, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120072201","display_name":"Elias Rotondo","orcid":null},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Elias Rotondo","raw_affiliation_strings":["Duke University, Durham, NC, USA"],"raw_orcid":"https://orcid.org/0000-0002-9674-6135","affiliations":[{"raw_affiliation_string":"Duke University, Durham, NC, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114957514","display_name":"Yanming Xiu","orcid":null},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanming Xiu","raw_affiliation_strings":["Duke University, Durham, NC, USA"],"raw_orcid":"https://orcid.org/0009-0008-1547-3261","affiliations":[{"raw_affiliation_string":"Duke University, Durham, NC, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088017885","display_name":"Sangjun Eom","orcid":"https://orcid.org/0000-0003-2075-6932"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sangjun Eom","raw_affiliation_strings":["Duke University, Durham, NC, USA"],"raw_orcid":"https://orcid.org/0000-0003-2075-6932","affiliations":[{"raw_affiliation_string":"Duke University, Durham, NC, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ryan Chen","orcid":"https://orcid.org/0009-0004-4803-0839"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ryan Chen","raw_affiliation_strings":["Duke University, Durham, NC, USA"],"raw_orcid":"https://orcid.org/0009-0004-4803-0839","affiliations":[{"raw_affiliation_string":"Duke University, Durham, NC, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034115979","display_name":"Conrad Li","orcid":null},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Conrad Li","raw_affiliation_strings":["Duke University, Durham, NC, USA"],"raw_orcid":"https://orcid.org/0009-0004-6436-8258","affiliations":[{"raw_affiliation_string":"Duke University, Durham, NC, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yuhe Hu","orcid":"https://orcid.org/0009-0004-0469-7164"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuhe Hu","raw_affiliation_strings":["Duke University, Durham, NC, USA"],"raw_orcid":"https://orcid.org/0009-0004-0469-7164","affiliations":[{"raw_affiliation_string":"Duke University, Durham, NC, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036726336","display_name":"Maria Gorlatova","orcid":"https://orcid.org/0000-0002-5477-7830"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Maria Gorlatova","raw_affiliation_strings":["Duke University, Durham, NC, USA"],"raw_orcid":"https://orcid.org/0000-0002-5477-7830","affiliations":[{"raw_affiliation_string":"Duke University, Durham, NC, USA","institution_ids":["https://openalex.org/I170897317"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9349,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.80057064,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"29","issue":"6","first_page":"25","last_page":"34"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10888","display_name":"Augmented Reality Applications","score":0.6985999941825867,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10888","display_name":"Augmented Reality Applications","score":0.6985999941825867,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11211","display_name":"3D Surveying and Cultural Heritage","score":0.6019999980926514,"subfield":{"id":"https://openalex.org/subfields/1907","display_name":"Geology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/augmented-reality","display_name":"Augmented reality","score":0.8687999844551086},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7060999870300293},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5464000105857849},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.4690999984741211},{"id":"https://openalex.org/keywords/quality-of-experience","display_name":"Quality of experience","score":0.39959999918937683},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.36090001463890076},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.33730000257492065},{"id":"https://openalex.org/keywords/user-experience-design","display_name":"User experience design","score":0.3131999969482422}],"concepts":[{"id":"https://openalex.org/C153715457","wikidata":"https://www.wikidata.org/wiki/Q254183","display_name":"Augmented reality","level":2,"score":0.8687999844551086},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8260999917984009},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7060999870300293},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5464000105857849},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4867999851703644},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.4690999984741211},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45910000801086426},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42309999465942383},{"id":"https://openalex.org/C2779333187","wikidata":"https://www.wikidata.org/wiki/Q3132648","display_name":"Quality of experience","level":3,"score":0.39959999918937683},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.36090001463890076},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.33730000257492065},{"id":"https://openalex.org/C201025465","wikidata":"https://www.wikidata.org/wiki/Q11248500","display_name":"User experience design","level":2,"score":0.3131999969482422},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3005000054836273},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.2702000141143799},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.26829999685287476},{"id":"https://openalex.org/C207267971","wikidata":"https://www.wikidata.org/wiki/Q120208","display_name":"Emerging technologies","level":2,"score":0.257099986076355},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.2563000023365021},{"id":"https://openalex.org/C21569690","wikidata":"https://www.wikidata.org/wiki/Q94702","display_name":"Collaborative filtering","level":3,"score":0.25450000166893005},{"id":"https://openalex.org/C115051666","wikidata":"https://www.wikidata.org/wiki/Q6522493","display_name":"Ranging","level":2,"score":0.2524000108242035},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.250900000333786}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mic.2025.3622505","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mic.2025.3622505","pdf_url":null,"source":{"id":"https://openalex.org/S205899252","display_name":"IEEE Internet Computing","issn_l":"1089-7801","issn":["1089-7801","1941-0131"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Internet Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3476036093","display_name":null,"funder_award_id":"IIS-2231975","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7709164144","display_name":null,"funder_award_id":"CNS-2112562","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8603034947","display_name":null,"funder_award_id":"CSR-2312760","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Augmented":[0],"Reality":[1],"(AR)":[2],"is":[3,26],"transforming":[4],"everyday":[5],"experiences":[6],"across":[7],"domains":[8],"like":[9],"education,":[10],"entertainment,":[11],"and":[12,21,61,63,79,90,111,132],"healthcare.":[13],"As":[14],"AR":[15,23,45,81,92,126],"technologies":[16],"become":[17],"increasingly":[18],"widespread,":[19],"human-aligned":[20],"scalable":[22,110],"quality":[24,46,93],"evaluation":[25,113,128],"critical":[27],"for":[28,43],"optimizing":[29],"immersive":[30],"user":[31],"experiences.":[32],"This":[33],"paper":[34],"investigates":[35],"the":[36],"potential":[37],"of":[38,54],"Large":[39],"Multimodal":[40],"Models":[41],"(LMMs)":[42],"automating":[44],"assessment.":[47],"We":[48,118],"curate":[49],"DiverseAR+,":[50],"a":[51,97],"new":[52],"dataset":[53],"1,405":[55],"scenes":[56],"collected":[57],"from":[58],"diverse":[59,115],"sources":[60],"environments,":[62],"use":[64],"it":[65],"to":[66],"evaluate":[67],"four":[68],"commercial":[69],"LMMs.":[70],"Our":[71],"results":[72],"show":[73],"that":[74,101,120],"LMMs":[75,103],"can":[76,124],"perceive,":[77],"describe,":[78],"judge":[80],"content":[82],"with":[83,104],"promising":[84],"accuracy.":[85],"To":[86],"deliver":[87],"real-time,":[88],"scalable,":[89],"robust":[91],"evaluation,":[94],"we":[95],"propose":[96],"hybrid":[98],"cloud-edge":[99],"architecture":[100],"combines":[102],"traditional":[105],"machine":[106],"learning":[107],"models,":[108],"enabling":[109],"real-time":[112],"under":[114],"network":[116],"conditions.":[117],"argue":[119],"task-tailored":[121],"AR-LMM":[122],"systems":[123],"make":[125],"experience":[127],"more":[129],"efficient,":[130],"adaptive,":[131],"user-centered.":[133]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-21T00:00:00"}
