{"id":"https://openalex.org/W4402352898","doi":"https://doi.org/10.1109/ijcnn60899.2024.10651189","title":"KD-VSUM: A Vision Guided Models for Multimodal Abstractive Summarization with Knowledge Distillation","display_name":"KD-VSUM: A Vision Guided Models for Multimodal Abstractive Summarization with Knowledge Distillation","publication_year":2024,"publication_date":"2024-06-30","ids":{"openalex":"https://openalex.org/W4402352898","doi":"https://doi.org/10.1109/ijcnn60899.2024.10651189"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn60899.2024.10651189","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ijcnn60899.2024.10651189","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110945421","display_name":"Zehong Zheng","orcid":null},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zehong Zheng","raw_affiliation_strings":["East China Normal University,Shanghai,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"East China Normal University,Shanghai,China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100616448","display_name":"Changlong Li","orcid":"https://orcid.org/0000-0002-4042-6538"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changlong Li","raw_affiliation_strings":["East China Normal University,Shanghai,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"East China Normal University,Shanghai,China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004454415","display_name":"Wenxin Hu","orcid":"https://orcid.org/0009-0002-7887-8502"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenxin Hu","raw_affiliation_strings":["East China Normal University,Shanghai,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"East China Normal University,Shanghai,China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100463044","display_name":"Su Wang","orcid":"https://orcid.org/0000-0001-5460-1707"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Su Wang","raw_affiliation_strings":["East China Normal University,Shanghai,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"East China Normal University,Shanghai,China","institution_ids":["https://openalex.org/I66867065"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12276789,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.9291471242904663},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6983926892280579},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.6890524625778198},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5028361678123474},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4659743309020996},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.35696858167648315},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.33829718828201294},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.14415764808654785},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.0762602686882019}],"concepts":[{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.9291471242904663},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6983926892280579},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.6890524625778198},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5028361678123474},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4659743309020996},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.35696858167648315},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33829718828201294},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.14415764808654785},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0762602686882019}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn60899.2024.10651189","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/ijcnn60899.2024.10651189","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322370","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1690739335","https://openalex.org/W1821462560","https://openalex.org/W1902237438","https://openalex.org/W1956340063","https://openalex.org/W1965555277","https://openalex.org/W1983719983","https://openalex.org/W2052791563","https://openalex.org/W2101105183","https://openalex.org/W2150824314","https://openalex.org/W2194775991","https://openalex.org/W2561238782","https://openalex.org/W2606974598","https://openalex.org/W2759570332","https://openalex.org/W2794791688","https://openalex.org/W2890721473","https://openalex.org/W2891444075","https://openalex.org/W2899274165","https://openalex.org/W2952132648","https://openalex.org/W2962934715","https://openalex.org/W2963736842","https://openalex.org/W3034999214","https://openalex.org/W3104210310","https://openalex.org/W3152798676","https://openalex.org/W3166396011","https://openalex.org/W3200369555","https://openalex.org/W3211495814","https://openalex.org/W4288089799","https://openalex.org/W4312614039","https://openalex.org/W6631190155","https://openalex.org/W6637551013","https://openalex.org/W6638523607","https://openalex.org/W6677328238","https://openalex.org/W6730179637","https://openalex.org/W6749737040","https://openalex.org/W6755559483","https://openalex.org/W6769627184","https://openalex.org/W6780226713","https://openalex.org/W6791353385","https://openalex.org/W6793759846"],"related_works":["https://openalex.org/W2366403280","https://openalex.org/W1495108544","https://openalex.org/W2091301346","https://openalex.org/W3148229873","https://openalex.org/W4389760904","https://openalex.org/W2150160875","https://openalex.org/W4242223894","https://openalex.org/W4306886878","https://openalex.org/W1517524280","https://openalex.org/W4323520239"],"abstract_inverted_index":{"Multimodal":[0],"abstract":[1,31,48],"summarization":[2,32,93],"is":[3],"increasingly":[4],"attracting":[5],"attention":[6],"due":[7],"to":[8,11,71,98,120,143,166,216],"its":[9],"ability":[10,70],"synthesize":[12],"information":[13,42,129],"from":[14,43,138],"different":[15],"source":[16],"modalities":[17],"and":[18,46,127,160,193,211],"generate":[19],"high-quality":[20],"text":[21],"summaries.":[22,49],"Concurrently,":[23],"there":[24],"has":[25],"been":[26],"significant":[27],"development":[28],"in":[29,78,107,188,191,195],"multimodal":[30,44,91,139],"models":[33,37,142,178],"for":[34,90],"videos.":[35,168],"These":[36,197],"are":[38],"capable":[39],"of":[40,75,102,130,157,164,186,206,219],"extracting":[41],"data":[45],"generating":[47],"Most":[50],"existing":[51,176],"modeling":[52],"approaches":[53],"primarily":[54],"concentrate":[55],"on":[56,123,135,179],"instructional":[57],"videos,":[58],"such":[59],"as":[60],"those":[61],"teaching":[62],"sports":[63],"or":[64],"life":[65],"skills,":[66],"thereby":[67],"limiting":[68],"their":[69],"capture":[72],"the":[73,79,100,118,124,149,180,200,204,217],"complexity":[74],"dynamic":[76],"environments":[77],"general":[80],"world.":[81],"In":[82],"this":[83],"paper,":[84],"we":[85],"propose":[86],"a":[87,113,154,161,207],"vision-guided":[88,114],"model":[89,119,145,174],"abstractive":[92],"with":[94],"knowledge":[95,136,212],"distillation":[96,137,213],"KD-VSUM":[97],"address":[99],"lack":[101],"generalized":[103],"video":[104,108,131,158,220],"domain":[105],"capabilities":[106],"summarization.":[109],"This":[110],"approach":[111],"includes":[112,153],"encoder,":[115],"which":[116,152],"enables":[117],"better":[121],"focus":[122],"global":[125,208],"spatial":[126],"temporal":[128],"frames.":[132],"We":[133,147],"capitalize":[134],"pre-trained":[140],"video-language":[141],"enhance":[144],"performance.":[146],"introduce":[148],"VersaVision":[150,181],"dataset,":[151,182],"broader":[155],"range":[156],"domains":[159],"higher":[162],"proportion":[163],"medium":[165],"long":[167],"The":[169],"results":[170],"demonstrate":[171],"that":[172,203],"our":[173],"surpasses":[175],"state-of-the-art":[177],"achieving":[183],"ROUGE":[184],"scores":[185],"1.7":[187],"ROUGE-1,":[189],"1.8":[190],"ROUGE-2,":[192],"2":[194],"ROUGE-L.":[196],"findings":[198],"underscore":[199],"substantial":[201],"improvements":[202],"integration":[205],"vision":[209],"guided":[210],"can":[214],"bring":[215],"task":[218],"summary":[221],"extraction.":[222]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
