{"id":"https://openalex.org/W2752191396","doi":"https://doi.org/10.1145/3123266.3123420","title":"Video Captioning with Guidance of Multimodal Latent Topics","display_name":"Video Captioning with Guidance of Multimodal Latent Topics","publication_year":2017,"publication_date":"2017-10-20","ids":{"openalex":"https://openalex.org/W2752191396","doi":"https://doi.org/10.1145/3123266.3123420","mag":"2752191396"},"language":"en","primary_location":{"id":"doi:10.1145/3123266.3123420","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3123266.3123420","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 25th ACM international conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101776086","display_name":"Shizhe Chen","orcid":"https://orcid.org/0000-0002-7313-9703"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shizhe Chen","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100416039","display_name":"Jia Chen","orcid":"https://orcid.org/0009-0005-0957-1744"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jia Chen","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009985839","display_name":"Qin Jin","orcid":"https://orcid.org/0000-0001-6486-6020"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qin Jin","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103099928","display_name":"Alexander G. Hauptmann","orcid":"https://orcid.org/0000-0003-2123-0684"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander Hauptmann","raw_affiliation_strings":["Carnegie Mellen University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellen University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101776086"],"corresponding_institution_ids":["https://openalex.org/I78988378"],"apc_list":null,"apc_paid":null,"fwci":3.1858,"has_fulltext":false,"cited_by_count":66,"citation_normalized_percentile":{"value":0.95367378,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1838","last_page":"1846"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9655312299728394},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8530358076095581},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7163567543029785},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6948709487915039},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.5914183855056763},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5699038505554199},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5221238732337952},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5159540772438049},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.48843327164649963},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4497895836830139},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4403385818004608},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.18102723360061646}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9655312299728394},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8530358076095581},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7163567543029785},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6948709487915039},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.5914183855056763},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5699038505554199},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5221238732337952},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5159540772438049},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.48843327164649963},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4497895836830139},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4403385818004608},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.18102723360061646},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3123266.3123420","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3123266.3123420","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 25th ACM international conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6600000262260437,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W1522734439","https://openalex.org/W1573040851","https://openalex.org/W1586939924","https://openalex.org/W1828348983","https://openalex.org/W1880262756","https://openalex.org/W1895577753","https://openalex.org/W1956340063","https://openalex.org/W1964442016","https://openalex.org/W1966385142","https://openalex.org/W1986007546","https://openalex.org/W2055132753","https://openalex.org/W2064675550","https://openalex.org/W2066134726","https://openalex.org/W2101105183","https://openalex.org/W2120190345","https://openalex.org/W2130942839","https://openalex.org/W2133459682","https://openalex.org/W2133564696","https://openalex.org/W2134797427","https://openalex.org/W2139501017","https://openalex.org/W2142900973","https://openalex.org/W2148154194","https://openalex.org/W2156387975","https://openalex.org/W2161000554","https://openalex.org/W2274287116","https://openalex.org/W2411037331","https://openalex.org/W2425121537","https://openalex.org/W2467684189","https://openalex.org/W2507365558","https://openalex.org/W2523993696","https://openalex.org/W2526486375","https://openalex.org/W2527349934","https://openalex.org/W2544271936","https://openalex.org/W2560313346","https://openalex.org/W2607119937","https://openalex.org/W2618127004","https://openalex.org/W2950178297","https://openalex.org/W2952881492","https://openalex.org/W2953022248","https://openalex.org/W2963293463","https://openalex.org/W2963576560","https://openalex.org/W2963843052","https://openalex.org/W2964241990","https://openalex.org/W2964350391"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2775506363","https://openalex.org/W3088136942","https://openalex.org/W4290852288","https://openalex.org/W2949362007","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W2949522393","https://openalex.org/W4399485860"],"abstract_inverted_index":{"The":[0,136,171,183],"topic":[1,73,96,106,121,137],"diversity":[2],"of":[3,75,134,198],"open-domain":[4],"videos":[5,76],"leads":[6],"to":[7,56,100,117,126,142,157],"various":[8],"vocabularies":[9],"and":[10,17,47,66,69,161,177,192,214],"linguistic":[11],"expressions":[12],"in":[13,42,89,98],"describing":[14],"video":[15,21,163],"contents,":[16],"therefore,":[18],"makes":[19],"the":[20,49,59,72,80,101,105,111,115,128,143,148,166,190,196],"captioning":[22],"task":[23],"even":[24],"more":[25,64,159],"challenging.":[26],"In":[27],"this":[28],"paper,":[29],"we":[30,91,109,151],"propose":[31,152],"an":[32],"unified":[33],"caption":[34,50,82,102,149],"framework,":[35],"M&M":[36,202],"TGM,":[37],"which":[38,90,124],"mines":[39],"multimodal":[40,61,132],"topics":[41,62,113,130],"unsupervised":[43],"fashion":[44],"from":[45,131,168,185],"data":[46],"guides":[48],"decoder":[51,156],"with":[52,165],"these":[53],"topics.":[54,170],"Compared":[55],"pre-defined":[57],"topics,":[58],"mined":[60,112],"are":[63],"semantically":[65],"visually":[67],"coherent":[68],"can":[70],"reflect":[71],"distribution":[74],"better.":[77],"We":[78],"formulate":[79],"topic-aware":[81,155],"generation":[83],"as":[84,114],"a":[85,93,119,153],"multi-task":[86],"learning":[87,144,173],"problem,":[88],"add":[92],"parallel":[94],"task,":[95,108,150],"prediction,":[97],"addition":[99],"task.":[103],"For":[104],"prediction":[107,122,138],"use":[110],"teacher":[116],"train":[118],"student":[120],"model,":[123],"learns":[125],"predict":[127],"latent":[129,169],"contents":[133],"videos.":[135],"provides":[139],"intermediate":[140],"supervision":[141],"process.":[145],"As":[146],"for":[147],"novel":[154],"generate":[158],"accurate":[160],"detailed":[162],"descriptions":[164],"guidance":[167],"entire":[172],"procedure":[174],"is":[175],"end-to-end":[176],"it":[178],"optimizes":[179],"both":[180,216],"tasks":[181],"simultaneously.":[182],"results":[184],"extensive":[186],"experiments":[187],"conducted":[188],"on":[189,210,215],"MSR-VTT":[191],"Youtube2Text":[193],"datasets":[194],"demonstrate":[195],"effectiveness":[197],"our":[199],"proposed":[200],"model.":[201],"TGM":[203],"not":[204],"only":[205],"outperforms":[206],"prior":[207],"state-of-the-art":[208],"methods":[209],"multiple":[211],"evaluation":[212],"metrics":[213],"benchmark":[217],"datasets,":[218],"but":[219],"also":[220],"achieves":[221],"better":[222],"generalization":[223],"ability.":[224]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":13},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":19},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
