{"id":"https://openalex.org/W3196703988","doi":"https://doi.org/10.1145/3461353.3461361","title":"Chinese description of videos incorporating multimodal features and attention mechanism","display_name":"Chinese description of videos incorporating multimodal features and attention mechanism","publication_year":2021,"publication_date":"2021-03-05","ids":{"openalex":"https://openalex.org/W3196703988","doi":"https://doi.org/10.1145/3461353.3461361","mag":"3196703988"},"language":"en","primary_location":{"id":"doi:10.1145/3461353.3461361","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3461353.3461361","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 the 5th International Conference on Innovation in Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109081571","display_name":"Hu Liu","orcid":"https://orcid.org/0000-0002-6019-2759"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hu Liu","raw_affiliation_strings":["Nanjing University of Aeronautics and Astronautics, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanjing University of Aeronautics and Astronautics, China","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086403008","display_name":"Junxiu Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junxiu Wu","raw_affiliation_strings":["Nanjing University of Aeronautics and Astronautics, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanjing University of Aeronautics and Astronautics, China","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100347995","display_name":"Jiabin Yuan","orcid":"https://orcid.org/0000-0002-1537-1440"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiabin Yuan","raw_affiliation_strings":["Nanjing University of Aeronautics and Astronautics, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanjing University of Aeronautics and Astronautics, China","institution_ids":["https://openalex.org/I9842412"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I9842412"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.10445691,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"49","last_page":"54"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9869999885559082,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8497469425201416},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5639387369155884},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5252361297607422},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5064412355422974},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.4531446397304535},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45217376947402954},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.34767013788223267}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8497469425201416},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5639387369155884},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5252361297607422},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5064412355422974},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.4531446397304535},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45217376947402954},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.34767013788223267},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3461353.3461361","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3461353.3461361","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 the 5th International Conference on Innovation in Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8500000238418579,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1586939924","https://openalex.org/W2106809158","https://openalex.org/W2139501017","https://openalex.org/W2188901757","https://openalex.org/W2192412620","https://openalex.org/W2411037331","https://openalex.org/W2418300416","https://openalex.org/W2618127004","https://openalex.org/W2619947201","https://openalex.org/W2749708282","https://openalex.org/W2807417407","https://openalex.org/W2886381447","https://openalex.org/W2945755204","https://openalex.org/W2962885853","https://openalex.org/W2963524571","https://openalex.org/W2964137974","https://openalex.org/W2979437663","https://openalex.org/W3033014863","https://openalex.org/W3102566412","https://openalex.org/W4200268060","https://openalex.org/W4288083805"],"related_works":["https://openalex.org/W2382997850","https://openalex.org/W2390968135","https://openalex.org/W2382213751","https://openalex.org/W2351750670","https://openalex.org/W1597848696","https://openalex.org/W2354715126","https://openalex.org/W2388563748","https://openalex.org/W2375179084","https://openalex.org/W2366646518","https://openalex.org/W2370906336"],"abstract_inverted_index":{"Video":[0],"description":[1,29,34,49,77,125,135],"is":[2,151],"a":[3,52,96],"hot":[4],"topic":[5],"in":[6,21,173],"the":[7,43,70,108,129,141,147,158,166],"area":[8],"of":[9,46,117,133,157],"computer":[10],"vision":[11],"and":[12,50,64,91,127,165],"natural":[13],"language":[14],"processing,":[15],"which":[16,58],"has":[17,169],"made":[18],"remarkable":[19],"achievements":[20],"recent":[22],"years.":[23],"But":[24],"most":[25],"researches":[26],"on":[27,37,69,102,112,161],"video":[28,47,55,76,104,124],"are":[30],"to":[31],"generate":[32],"English":[33,119,174],"while":[35],"few":[36],"Chinese":[38,48,56,83,88,110,115,134,148,163],"description.":[39,105],"This":[40],"paper":[41],"explores":[42],"generation":[44],"process":[45],"proposes":[51],"model":[53,78,111,149,167],"for":[54,99],"description,":[57],"introduces":[59],"three":[60],"complementary":[61],"modal":[62],"features":[63],"temporal":[65],"attention":[66],"mechanism":[67],"based":[68],"general":[71],"encoder-decoder":[72],"framework.":[73],"The":[74],"optimized":[75],"combined":[79],"with":[80],"an":[81,113,170],"appropriate":[82],"preprocessing":[84],"method":[85],"further":[86],"improves":[87],"descriptions'":[89],"richness":[90],"accuracy.":[92],"These":[93],"works":[94],"provide":[95],"valuable":[97],"reference":[98],"future":[100],"research":[101],"multilingual":[103],"We":[106],"tested":[107],"proposed":[109,150],"expanded":[114],"corpus":[116],"standard":[118],"dataset":[120],"MSVD":[121],"(Microsoft":[122],"Research":[123],"corpus)":[126],"studied":[128],"special":[130],"processing":[131],"methods":[132],"generation.":[136],"Experimental":[137],"results":[138],"show":[139],"that":[140,156],"highest":[142],"METEOR":[143],"value":[144],"obtained":[145],"by":[146],"still":[152],"6.6%":[153],"higher":[154],"than":[155],"best":[159],"result":[160,172],"MSVD's":[162],"corpus,":[164],"also":[168],"advanced":[171],"environment.":[175]},"counts_by_year":[],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
