{"id":"https://openalex.org/W3160237369","doi":"https://doi.org/10.1145/3411764.3445347","title":"Toward Automatic Audio Description Generation for Accessible Videos","display_name":"Toward Automatic Audio Description Generation for Accessible Videos","publication_year":2021,"publication_date":"2021-05-06","ids":{"openalex":"https://openalex.org/W3160237369","doi":"https://doi.org/10.1145/3411764.3445347","mag":"3160237369"},"language":"en","primary_location":{"id":"doi:10.1145/3411764.3445347","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3411764.3445347","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 CHI Conference on Human Factors in Computing Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100767801","display_name":"Yujia Wang","orcid":"https://orcid.org/0000-0002-6733-4967"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]},{"id":"https://openalex.org/I202334528","display_name":"Beijing Electronic Science and Technology Institute","ror":"https://ror.org/01xdzh226","country_code":"CN","type":"education","lineage":["https://openalex.org/I202334528"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yujia Wang","raw_affiliation_strings":["Computer Science Beijing Institute of Technology, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science Beijing Institute of Technology, China","institution_ids":["https://openalex.org/I202334528","https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112277676","display_name":"Wei Liang","orcid":"https://orcid.org/0000-0002-4730-017X"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Liang","raw_affiliation_strings":["School of Computer Science Beijing Institute of Technology, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science Beijing Institute of Technology, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016202446","display_name":"Haikun Huang","orcid":"https://orcid.org/0000-0002-5962-0533"},"institutions":[{"id":"https://openalex.org/I162714631","display_name":"George Mason University","ror":"https://ror.org/02jqj7156","country_code":"US","type":"education","lineage":["https://openalex.org/I162714631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haikun Huang","raw_affiliation_strings":["Computer Science Department George Mason University, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science Department George Mason University, United States","institution_ids":["https://openalex.org/I162714631"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045112676","display_name":"Yongqi Zhang","orcid":"https://orcid.org/0000-0001-8013-847X"},"institutions":[{"id":"https://openalex.org/I33434090","display_name":"University of Massachusetts Boston","ror":"https://ror.org/04ydmy275","country_code":"US","type":"education","lineage":["https://openalex.org/I33434090"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yongqi Zhang","raw_affiliation_strings":["Computer Science Department University of Massachusetts Boston, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science Department University of Massachusetts Boston, United States","institution_ids":["https://openalex.org/I33434090"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038972102","display_name":"Dingzeyu Li","orcid":"https://orcid.org/0000-0002-4222-8105"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dingzeyu Li","raw_affiliation_strings":["Adobe Research, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Adobe Research, United States","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084967276","display_name":"Lap-Fai Yu","orcid":"https://orcid.org/0000-0002-2656-5654"},"institutions":[{"id":"https://openalex.org/I162714631","display_name":"George Mason University","ror":"https://ror.org/02jqj7156","country_code":"US","type":"education","lineage":["https://openalex.org/I162714631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lap-Fai Yu","raw_affiliation_strings":["Computer Science George Mason University, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science George Mason University, United States","institution_ids":["https://openalex.org/I162714631"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100767801"],"corresponding_institution_ids":["https://openalex.org/I125839683","https://openalex.org/I202334528"],"apc_list":null,"apc_paid":null,"fwci":51.2207,"has_fulltext":false,"cited_by_count":84,"citation_normalized_percentile":{"value":0.99882122,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13310","display_name":"Subtitles and Audiovisual Media","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13310","display_name":"Subtitles and Audiovisual Media","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.986299991607666,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10759","display_name":"Translation Studies and Practices","score":0.9764999747276306,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7884961366653442},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3548456132411957},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.3426954746246338},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.3320053219795227}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7884961366653442},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3548456132411957},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3426954746246338},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3320053219795227}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3411764.3445347","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3411764.3445347","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 CHI Conference on Human Factors in Computing Systems","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6899999976158142,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W27089656","https://openalex.org/W43248191","https://openalex.org/W245720487","https://openalex.org/W340809548","https://openalex.org/W1522734439","https://openalex.org/W1559036573","https://openalex.org/W1573040851","https://openalex.org/W1584238570","https://openalex.org/W2009029255","https://openalex.org/W2027317161","https://openalex.org/W2032337854","https://openalex.org/W2041992741","https://openalex.org/W2139501017","https://openalex.org/W2153579005","https://openalex.org/W2409683191","https://openalex.org/W2414001829","https://openalex.org/W2521985973","https://openalex.org/W2522595994","https://openalex.org/W2556388456","https://openalex.org/W2578879311","https://openalex.org/W2587602959","https://openalex.org/W2597474111","https://openalex.org/W2605035112","https://openalex.org/W2619697695","https://openalex.org/W2743160829","https://openalex.org/W2748163013","https://openalex.org/W2752172973","https://openalex.org/W2786685006","https://openalex.org/W2791213089","https://openalex.org/W2891955747","https://openalex.org/W2913293259","https://openalex.org/W2914699769","https://openalex.org/W2941092099","https://openalex.org/W2962756039","https://openalex.org/W2962799512","https://openalex.org/W2963351113","https://openalex.org/W2963524571","https://openalex.org/W2963644595","https://openalex.org/W2963753226","https://openalex.org/W2963916161","https://openalex.org/W2979157532","https://openalex.org/W2979826702","https://openalex.org/W2980282514","https://openalex.org/W2981800437","https://openalex.org/W2982619606","https://openalex.org/W2983429428","https://openalex.org/W2986670728","https://openalex.org/W2998704965","https://openalex.org/W2998746484","https://openalex.org/W3029105705","https://openalex.org/W3093257790","https://openalex.org/W3095951396","https://openalex.org/W3104915307","https://openalex.org/W4288083805","https://openalex.org/W6680532216"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Video":[0],"accessibility":[1,39],"is":[2,13],"essential":[3],"for":[4,124],"people":[5],"with":[6,89,110],"visual":[7],"impairments.":[8],"Audio":[9],"descriptions":[10,27],"describe":[11],"what":[12],"happening":[14],"on-screen,":[15],"e.g.,":[16],"physical":[17],"actions,":[18],"facial":[19],"expressions,":[20],"and":[21,53,70,93,113],"scene":[22],"changes.":[23],"Generating":[24],"high-quality":[25],"audio":[26,56,106,129],"requires":[28],"a":[29,43,51],"lot":[30],"of":[31,50,61,77,83,127],"manual":[32],"description":[33,107,130],"generation":[34,131],"[50].":[35],"To":[36],"address":[37],"this":[38],"obstacle,":[40],"we":[41,121],"built":[42],"system":[44,59,79],"that":[45],"analyzes":[46],"the":[47,55,75,125],"audiovisual":[48],"contents":[49],"video":[52,114],"generates":[54],"descriptions.":[57],"The":[58],"consisted":[60],"three":[62],"modules:":[63],"AD":[64,68,71],"insertion":[65],"time":[66],"prediction,":[67],"generation,":[69],"optimization.":[72],"We":[73],"evaluated":[74],"quality":[76],"our":[78,118],"on":[80,117],"five":[81],"types":[82,112],"videos":[84],"by":[85],"conducting":[86],"qualitative":[87],"studies":[88],"20":[90],"sighted":[91],"users":[92,95],"12":[94],"who":[96],"were":[97],"blind":[98],"or":[99],"visually":[100],"impaired.":[101],"Our":[102],"findings":[103],"revealed":[104],"how":[105],"preferences":[108],"varied":[109],"user":[111],"types.":[115],"Based":[116],"study\u2019s":[119],"analysis,":[120],"provided":[122],"recommendations":[123],"development":[126],"future":[128],"technologies.":[132]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":26},{"year":2024,"cited_by_count":26},{"year":2023,"cited_by_count":17},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":5}],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2025-10-10T00:00:00"}
