{"id":"https://openalex.org/W4409767710","doi":"https://doi.org/10.1145/3706598.3714096","title":"VideoA11y: Method and Dataset for Accessible Video Description","display_name":"VideoA11y: Method and Dataset for Accessible Video Description","publication_year":2025,"publication_date":"2025-04-24","ids":{"openalex":"https://openalex.org/W4409767710","doi":"https://doi.org/10.1145/3706598.3714096","pmid":"https://pubmed.ncbi.nlm.nih.gov/40894856"},"language":"en","primary_location":{"id":"doi:10.1145/3706598.3714096","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3706598.3714096","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 CHI Conference on Human Factors in Computing Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC12398407/pdf/nihms-2082836.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101689952","display_name":"Chaoyu Li","orcid":"https://orcid.org/0000-0003-0788-0189"},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chaoyu Li","raw_affiliation_strings":["School of Computing and Augmented Intelligence, Arizona State University, Tempe, Arizona, USA"],"raw_orcid":"https://orcid.org/0000-0003-0788-0189","affiliations":[{"raw_affiliation_string":"School of Computing and Augmented Intelligence, Arizona State University, Tempe, Arizona, USA","institution_ids":["https://openalex.org/I55732556"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004287508","display_name":"Sid Padmanabhuni","orcid":"https://orcid.org/0000-0002-6568-8139"},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sid Padmanabhuni","raw_affiliation_strings":["School of Computing and Augmented Intelligence, Arizona State University, Tempe, Arizona, USA"],"raw_orcid":"https://orcid.org/0000-0002-6568-8139","affiliations":[{"raw_affiliation_string":"School of Computing and Augmented Intelligence, Arizona State University, Tempe, Arizona, USA","institution_ids":["https://openalex.org/I55732556"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5117294161","display_name":"Maryam S Cheema","orcid":null},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Maryam S Cheema","raw_affiliation_strings":["School of Computing and Augmented Intelligence, Arizona State University, Tempe, Arizona, USA"],"raw_orcid":"https://orcid.org/0009-0009-0556-8029","affiliations":[{"raw_affiliation_string":"School of Computing and Augmented Intelligence, Arizona State University, Tempe, Arizona, USA","institution_ids":["https://openalex.org/I55732556"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043917822","display_name":"Hasti Seifi","orcid":"https://orcid.org/0000-0001-6437-0463"},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hasti Seifi","raw_affiliation_strings":["School of Computing and Augmented Intelligence, Arizona State University, Tempe, Arizona, USA"],"raw_orcid":"https://orcid.org/0000-0001-6437-0463","affiliations":[{"raw_affiliation_string":"School of Computing and Augmented Intelligence, Arizona State University, Tempe, Arizona, USA","institution_ids":["https://openalex.org/I55732556"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049316035","display_name":"Pooyan Fazli","orcid":"https://orcid.org/0000-0002-2625-8216"},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pooyan Fazli","raw_affiliation_strings":["School of Arts, Media and Engineering, Arizona State University, Tempe, Arizona, USA"],"raw_orcid":"https://orcid.org/0000-0002-2625-8216","affiliations":[{"raw_affiliation_string":"School of Arts, Media and Engineering, Arizona State University, Tempe, Arizona, USA","institution_ids":["https://openalex.org/I55732556"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.6747,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.94758562,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"2025","issue":null,"first_page":"1","last_page":"29"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9847999811172485,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8057653903961182},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3784070611000061},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.36046552658081055},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.3424682319164276}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8057653903961182},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3784070611000061},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.36046552658081055},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3424682319164276}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3706598.3714096","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3706598.3714096","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 CHI Conference on Human Factors in Computing Systems","raw_type":"proceedings-article"},{"id":"pmid:40894856","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40894856","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGCHI conference on human factors in computing systems. CHI Conference","raw_type":null},{"id":"pmh:oai:europepmc.org:11209149","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/12398407","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC12398407/pdf/nihms-2082836.pdf","source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"},{"id":"pmh:oai:pubmedcentral.nih.gov:12398407","is_oa":true,"landing_page_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC12398407/","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proc SIGCHI Conf Hum Factor Comput Syst","raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:europepmc.org:11209149","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/12398407","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC12398407/pdf/nihms-2082836.pdf","source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6287431198","display_name":null,"funder_award_id":"R01EY034562","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"}],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320337350","display_name":"National Eye Institute","ror":"https://ror.org/03wkg3b53"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4409767710.pdf"},"referenced_works_count":59,"referenced_works":["https://openalex.org/W289273921","https://openalex.org/W398859631","https://openalex.org/W569478347","https://openalex.org/W1549672311","https://openalex.org/W1956340063","https://openalex.org/W1978098427","https://openalex.org/W1979702275","https://openalex.org/W2133512280","https://openalex.org/W2295423240","https://openalex.org/W2337252826","https://openalex.org/W2425121537","https://openalex.org/W2477695877","https://openalex.org/W2506483933","https://openalex.org/W2952132648","https://openalex.org/W2963916161","https://openalex.org/W2984008963","https://openalex.org/W2989322838","https://openalex.org/W3023742835","https://openalex.org/W3032743533","https://openalex.org/W3035237998","https://openalex.org/W3038896047","https://openalex.org/W3094385957","https://openalex.org/W3095481265","https://openalex.org/W3104862079","https://openalex.org/W3106697459","https://openalex.org/W3110749958","https://openalex.org/W3162317218","https://openalex.org/W3204588463","https://openalex.org/W3207801264","https://openalex.org/W3216765867","https://openalex.org/W4210758933","https://openalex.org/W4225146386","https://openalex.org/W4226289673","https://openalex.org/W4256048441","https://openalex.org/W4288083805","https://openalex.org/W4293112598","https://openalex.org/W4303614602","https://openalex.org/W4307129979","https://openalex.org/W4307475429","https://openalex.org/W4310895557","https://openalex.org/W4312864639","https://openalex.org/W4313190371","https://openalex.org/W4366590580","https://openalex.org/W4386066385","https://openalex.org/W4386076314","https://openalex.org/W4389519587","https://openalex.org/W4391940741","https://openalex.org/W4393157125","https://openalex.org/W4394625882","https://openalex.org/W4396832346","https://openalex.org/W4401043560","https://openalex.org/W4402671548","https://openalex.org/W4402727885","https://openalex.org/W4402753903","https://openalex.org/W4402754238","https://openalex.org/W4402951675","https://openalex.org/W4403576862","https://openalex.org/W4403780613","https://openalex.org/W4404784276"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Video":[0],"descriptions":[1,22,40,71,116],"are":[2,122,161],"crucial":[3],"for":[4,20,73,93],"blind":[5],"and":[6,65,85,109,121,133,144,159],"low":[7],"vision":[8],"(BLV)":[9],"users":[10],"to":[11,27,69,124],"access":[12],"visual":[13],"content.":[14],"However,":[15],"current":[16],"artificial":[17],"intelligence":[18],"models":[19,63,138],"generating":[21],"often":[23],"fall":[24],"short":[25],"due":[26],"limitations":[28],"in":[29,39,128],"the":[30,83],"quality":[31],"of":[32,89],"human":[33,119,126],"annotations":[34,120,127],"within":[35],"training":[36],"datasets,":[37],"resulting":[38],"that":[41,58,114,148],"do":[42],"not":[43],"fully":[44],"meet":[45],"BLV":[46,74,94,107],"users'":[47],"needs.":[48],"To":[49],"address":[50],"this":[51,77,152],"gap,":[52],"we":[53,79],"introduce":[54],"VideoA11y,":[55],"an":[56],"approach":[57],"leverages":[59],"multimodal":[60],"large":[61],"language":[62],"(MLLMs)":[64],"video":[66,100],"accessibility":[67],"guidelines":[68],"generate":[70],"tailored":[72],"individuals.":[75],"Using":[76],"method,":[78],"have":[80],"curated":[81],"VideoA11y-40K,":[82],"largest":[84],"most":[86],"comprehensive":[87],"dataset":[88,153,160],"40,000":[90],"videos":[91],"described":[92],"users.":[95],"Rigorous":[96],"experiments":[97],"across":[98],"15":[99],"categories,":[101],"involving":[102],"347":[103],"sighted":[104],"participants,":[105,108],"40":[106],"seven":[110],"professional":[111],"describers,":[112],"showed":[113],"VideoA11y":[115],"outperform":[117],"novice":[118],"comparable":[123],"trained":[125],"clarity,":[129],"accuracy,":[130],"objectivity,":[131],"descriptiveness,":[132],"user":[134],"satisfaction.":[135],"We":[136],"evaluated":[137],"on":[139,151],"VideoA11y-40K":[140],"using":[141],"both":[142],"standard":[143],"custom":[145],"metrics,":[146],"demonstrating":[147],"MLLMs":[149],"fine-tuned":[150],"produce":[154],"high-quality":[155],"accessible":[156],"descriptions.":[157],"Code":[158],"available":[162],"at":[163],"https://people-robots.github.io/VideoA11y/.":[164]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
