{"id":"https://openalex.org/W2809191921","doi":"https://doi.org/10.1109/tkde.2018.2848260","title":"Read, Watch, Listen, and Summarize: Multi-Modal Summarization for Asynchronous Text, Image, Audio and Video","display_name":"Read, Watch, Listen, and Summarize: Multi-Modal Summarization for Asynchronous Text, Image, Audio and Video","publication_year":2018,"publication_date":"2018-06-22","ids":{"openalex":"https://openalex.org/W2809191921","doi":"https://doi.org/10.1109/tkde.2018.2848260","mag":"2809191921"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2018.2848260","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2018.2848260","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100327720","display_name":"Haoran Li","orcid":"https://orcid.org/0000-0002-2368-7541"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haoran Li","raw_affiliation_strings":["National Laboratory of Pattern Recognition Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015809194","display_name":"Junnan Zhu","orcid":"https://orcid.org/0000-0002-9856-2946"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junnan Zhu","raw_affiliation_strings":["National Laboratory of Pattern Recognition Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101810188","display_name":"Cong Ma","orcid":"https://orcid.org/0000-0002-9787-6273"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cong Ma","raw_affiliation_strings":["National Laboratory of Pattern Recognition Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100319572","display_name":"Jiajun Zhang","orcid":"https://orcid.org/0000-0001-5293-7434"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiajun Zhang","raw_affiliation_strings":["National Laboratory of Pattern Recognition Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015785439","display_name":"Chengqing Zong","orcid":"https://orcid.org/0000-0002-9864-3818"},"institutions":[{"id":"https://openalex.org/I4210157642","display_name":"Institute of Automation","ror":"https://ror.org/056qj1t15","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210157642","https://openalex.org/I78650965"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Chengqing Zong","raw_affiliation_strings":["National Laboratory of Pattern Recognition, Institute of Automation"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition, Institute of Automation","institution_ids":["https://openalex.org/I4210157642"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100327720"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210112150"],"apc_list":null,"apc_paid":null,"fwci":2.7715,"has_fulltext":false,"cited_by_count":75,"citation_normalized_percentile":{"value":0.93235783,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"31","issue":"5","first_page":"996","last_page":"1009"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9064253568649292},{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.8450528383255005},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48638013005256653},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.46040499210357666},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.4478069245815277},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4476286470890045},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.391653835773468},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.35880863666534424}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9064253568649292},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.8450528383255005},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48638013005256653},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.46040499210357666},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.4478069245815277},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4476286470890045},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.391653835773468},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.35880863666534424},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tkde.2018.2848260","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2018.2848260","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6600000262260437}],"awards":[{"id":"https://openalex.org/G2204133011","display_name":null,"funder_award_id":"61333018","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6753692345","display_name":null,"funder_award_id":"61673380","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":82,"referenced_works":["https://openalex.org/W174630521","https://openalex.org/W939280201","https://openalex.org/W1520857482","https://openalex.org/W1525595230","https://openalex.org/W1574901103","https://openalex.org/W1651093245","https://openalex.org/W1686810756","https://openalex.org/W1861492603","https://openalex.org/W1880262756","https://openalex.org/W1943253508","https://openalex.org/W1962684803","https://openalex.org/W1973010836","https://openalex.org/W1973894278","https://openalex.org/W1981974552","https://openalex.org/W1982153296","https://openalex.org/W1993078493","https://openalex.org/W1997164082","https://openalex.org/W2008129036","https://openalex.org/W2026012689","https://openalex.org/W2046059962","https://openalex.org/W2056935845","https://openalex.org/W2060724430","https://openalex.org/W2063113600","https://openalex.org/W2066636486","https://openalex.org/W2074051546","https://openalex.org/W2080379754","https://openalex.org/W2093351881","https://openalex.org/W2093541376","https://openalex.org/W2097241552","https://openalex.org/W2097606805","https://openalex.org/W2105395363","https://openalex.org/W2110693578","https://openalex.org/W2127615881","https://openalex.org/W2128618367","https://openalex.org/W2129291408","https://openalex.org/W2144933361","https://openalex.org/W2148086098","https://openalex.org/W2148374900","https://openalex.org/W2150824314","https://openalex.org/W2151170651","https://openalex.org/W2155871583","https://openalex.org/W2157023096","https://openalex.org/W2158847908","https://openalex.org/W2159940559","https://openalex.org/W2168154661","https://openalex.org/W2185175083","https://openalex.org/W2197590357","https://openalex.org/W2250361277","https://openalex.org/W2250539671","https://openalex.org/W2251332263","https://openalex.org/W2259890326","https://openalex.org/W2334889010","https://openalex.org/W2335163676","https://openalex.org/W2365919995","https://openalex.org/W2461084364","https://openalex.org/W2463955103","https://openalex.org/W2529165750","https://openalex.org/W2547411571","https://openalex.org/W2558173469","https://openalex.org/W2595558070","https://openalex.org/W2611926148","https://openalex.org/W2759570332","https://openalex.org/W2765440071","https://openalex.org/W2963389687","https://openalex.org/W3101913037","https://openalex.org/W4231510805","https://openalex.org/W6607104753","https://openalex.org/W6624607791","https://openalex.org/W6631167871","https://openalex.org/W6637231022","https://openalex.org/W6637373629","https://openalex.org/W6639102338","https://openalex.org/W6639619044","https://openalex.org/W6641263247","https://openalex.org/W6665613779","https://openalex.org/W6674832899","https://openalex.org/W6678892181","https://openalex.org/W6681106510","https://openalex.org/W6682983835","https://openalex.org/W6687459197","https://openalex.org/W6785421358","https://openalex.org/W7015831105"],"related_works":["https://openalex.org/W2366403280","https://openalex.org/W1495108544","https://openalex.org/W2091301346","https://openalex.org/W3148229873","https://openalex.org/W2150160875","https://openalex.org/W4242223894","https://openalex.org/W1517524280","https://openalex.org/W4323520239","https://openalex.org/W2028097510","https://openalex.org/W2505877856"],"abstract_inverted_index":{"Automatic":[0],"text":[1,17,136],"summarization":[2,34],"is":[3,86],"a":[4,15,19,140,172,193],"fundamental":[5],"natural":[6],"language":[7],"processing":[8],"(NLP)":[9],"application":[10],"that":[11,54,210],"aims":[12],"to":[13,65,75,87,111,117,170],"condense":[14],"source":[16],"into":[18],"shorter":[20],"version.":[21],"The":[22,83,202],"rapid":[23],"increase":[24],"in":[25,71,101,198],"multimedia":[26,80],"data":[27,73],"transmission":[28],"over":[29],"the":[30,56,67,77,89,102,119,122,132,146,149,165,177,184],"Internet":[31],"necessitates":[32],"multi-modal":[33,72,93,160,166],"(MMS)":[35],"from":[36],"asynchronous":[37],"collections":[38],"of":[39,58,79,121,135,148,187],"text,":[40],"image,":[41],"audio,":[42],"and":[43,62,74,96,116,137,181,200,217],"video.":[44,103],"In":[45],"this":[46],"work,":[47],"we":[48,107,130,144],"propose":[49],"an":[50,109],"extractive":[51],"MMS":[52,196],"method":[53],"unites":[55],"techniques":[57],"NLP,":[59],"speech":[60],"processing,":[61],"computer":[63],"vision":[64],"explore":[66],"rich":[68],"information":[69,155],"contained":[70],"improve":[76],"quality":[78],"news":[81],"summarization.":[82],"key":[84],"idea":[85],"bridge":[88],"semantic":[90],"gaps":[91],"between":[92],"content.":[94],"Audio":[95],"visual":[97,128,154],"are":[98,168],"main":[99],"modalities":[100],"For":[104,127],"audio":[105,125],"information,":[106,129],"design":[108],"approach":[110],"selectively":[112],"use":[113],"its":[114],"transcription":[115,123],"infer":[118],"salience":[120],"with":[124],"signals.":[126],"learn":[131],"joint":[133],"representations":[134],"images":[138],"using":[139],"neural":[141],"network.":[142],"Then,":[143],"capture":[145],"coverage":[147,182],"generated":[150],"summary":[151,174],"for":[152],"important":[153],"through":[156,183],"text-image":[157],"matching":[158,216],"or":[159],"topic":[161,219],"modeling.":[162],"Finally,":[163],"all":[164],"aspects":[167],"considered":[169],"generate":[171],"textual":[173],"by":[175],"maximizing":[176],"salience,":[178],"non-redundancy,":[179],"readability,":[180],"budgeted":[185],"optimization":[186],"submodular":[188],"functions.":[189],"We":[190],"further":[191],"introduce":[192],"publicly":[194],"available":[195],"corpus":[197],"English":[199],"Chinese.1":[201],"experimental":[203],"results":[204],"obtained":[205],"on":[206,214],"our":[207,211],"dataset":[208],"demonstrate":[209],"methods":[212],"based":[213],"image":[215,218],"framework":[220],"outperform":[221],"other":[222],"competitive":[223],"baseline":[224],"methods.":[225]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":12},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":13},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":1}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
