{"id":"https://openalex.org/W2526544345","doi":"https://doi.org/10.1145/2964284.2967298","title":"Boosting Video Description Generation by Explicitly Translating from Frame-Level Captions","display_name":"Boosting Video Description Generation by Explicitly Translating from Frame-Level Captions","publication_year":2016,"publication_date":"2016-09-29","ids":{"openalex":"https://openalex.org/W2526544345","doi":"https://doi.org/10.1145/2964284.2967298","mag":"2526544345"},"language":"en","primary_location":{"id":"doi:10.1145/2964284.2967298","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2964284.2967298","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM international conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100390819","display_name":"Yuan Liu","orcid":"https://orcid.org/0000-0001-7514-8876"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yuan Liu","raw_affiliation_strings":["Ricoh Software Research Center (Beijing) Co., Ltd., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Ricoh Software Research Center (Beijing) Co., Ltd., Beijing, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101873354","display_name":"Zhongchao Shi","orcid":"https://orcid.org/0000-0002-5216-3827"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhongchao Shi","raw_affiliation_strings":["Ricoh Software Research Center (Beijing) Co., Ltd., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Ricoh Software Research Center (Beijing) Co., Ltd., Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100390819"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.67,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.8944065,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"631","last_page":"634"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9263945817947388},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8797374963760376},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.731096088886261},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6590707302093506},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6498490571975708},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5517909526824951},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5459016561508179},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.5399678945541382},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.45234715938568115},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4356299936771393},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3561500906944275},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3257848620414734},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.2690083980560303},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.20033958554267883}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9263945817947388},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8797374963760376},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.731096088886261},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6590707302093506},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6498490571975708},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5517909526824951},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5459016561508179},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.5399678945541382},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.45234715938568115},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4356299936771393},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3561500906944275},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3257848620414734},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2690083980560303},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.20033958554267883},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2964284.2967298","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2964284.2967298","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM international conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.46000000834465027}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1573040851","https://openalex.org/W1586939924","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W2064675550","https://openalex.org/W2097117768","https://openalex.org/W2117539524","https://openalex.org/W2133459682","https://openalex.org/W2139501017","https://openalex.org/W2164290393","https://openalex.org/W2425121537","https://openalex.org/W2949888546","https://openalex.org/W2962835968","https://openalex.org/W2964241990","https://openalex.org/W2964308564"],"related_works":["https://openalex.org/W2541791370","https://openalex.org/W2035976912","https://openalex.org/W4361193378","https://openalex.org/W2109974539","https://openalex.org/W2738084969","https://openalex.org/W2125927971","https://openalex.org/W2954664659","https://openalex.org/W4200486724","https://openalex.org/W1999222583","https://openalex.org/W2042311553"],"abstract_inverted_index":{"Automatically":[0],"describing":[1],"video":[2,71,122,200],"content":[3],"with":[4,185],"natural":[5],"language":[6],"is":[7,22,180],"a":[8,144,156,173],"fundamental":[9],"challenge":[10],"of":[11,64,70,138,164],"computer":[12],"vision.":[13],"The":[14,27,107],"recent":[15],"advanced":[16],"technique":[17],"that":[18,78,158,195],"approaches":[19],"this":[20,76],"problem":[21],"Recurrent":[23],"Neural":[24],"Networks":[25],"(RNN).":[26],"need":[28],"to":[29,117,147,150,182,211],"train":[30],"RNN":[31],"on":[32,113,199],"large-scale":[33],"complex":[34],"and":[35,38,48,96,167,202],"diverse":[36],"videos":[37],"their":[39],"associated":[40],"language,":[41],"however,":[42],"makes":[43],"the":[44,52,68,79,89,110,125,129,136,159,162],"task":[45],"human-labeling":[46],"intensive":[47],"computationally":[49],"expensive.":[50],"Moreover,":[51],"results":[53],"can":[54,83],"suffer":[55],"from":[56,93,208],"robustness":[57],"problem,":[58],"especially":[59],"when":[60],"there":[61],"are":[62,133,161,196],"rich":[63],"temporal":[65],"dynamics":[66],"in":[67,75,155],"sequence":[69,146,148],"frames.":[72],"We":[73],"demonstrate":[74],"paper":[77],"above":[80],"two":[81],"limitations":[82],"be":[84,183],"mitigated":[85],"by":[86,100],"jointly":[87],"exploring":[88],"largely":[90],"available":[91],"data":[92,201],"image":[94,114],"domain":[95],"representing":[97],"each":[98,121,139],"frame":[99],"high-level":[101],"attributes":[102,137],"rather":[103],"than":[104],"visual":[105,209],"features.":[106],"former":[108],"leverages":[109],"learnt":[111],"models":[112,204],"captioning":[115],"benchmark":[116],"generate":[118,151],"caption":[119],"for":[120,153],"frame,":[123],"while":[124],"latter":[126],"explicitly":[127],"incorporates":[128],"obtained":[130],"captions":[131,163],"which":[132,205],"regarded":[134],"as":[135],"frame.":[140],"Specifically,":[141],"we":[142],"propose":[143],"novel":[145],"architecture":[149],"descriptions":[152],"videos,":[154],"sense":[157],"inputs":[160],"sequential":[165],"frames":[166],"it":[168],"outputs":[169],"words":[170],"sequentially.":[171],"On":[172],"widely":[174],"used":[175],"YouTube2Text":[176],"dataset,":[177],"our":[178],"proposal":[179],"shown":[181],"powerful":[184],"superior":[186],"performance":[187],"over":[188],"several":[189],"state-of-the-art":[190],"methods":[191],"including":[192],"both":[193],"architectures":[194],"purely":[197],"developed":[198],"RNN-based":[203],"translate":[206],"directly":[207],"features":[210],"language.":[212]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
