{"id":"https://openalex.org/W2799315352","doi":"https://doi.org/10.1117/12.2310071","title":"From image captioning to video summary using deep recurrent networks and unsupervised segmentation","display_name":"From image captioning to video summary using deep recurrent networks and unsupervised segmentation","publication_year":2018,"publication_date":"2018-04-13","ids":{"openalex":"https://openalex.org/W2799315352","doi":"https://doi.org/10.1117/12.2310071","mag":"2799315352"},"language":"en","primary_location":{"id":"doi:10.1117/12.2310071","is_oa":false,"landing_page_url":"https://doi.org/10.1117/12.2310071","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Tenth International Conference on Machine Vision (ICMV 2017)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070444955","display_name":"Camelia Lemnaru","orcid":"https://orcid.org/0000-0002-4901-9808"},"institutions":[{"id":"https://openalex.org/I158333966","display_name":"Technical University of Cluj-Napoca","ror":"https://ror.org/03r8nwp71","country_code":"RO","type":"education","lineage":["https://openalex.org/I158333966"]}],"countries":["RO"],"is_corresponding":true,"raw_author_name":"Camelia Lemnaru","raw_affiliation_strings":["Technical Univ. of Cluj-Napoca (Romania)"],"affiliations":[{"raw_affiliation_string":"Technical Univ. of Cluj-Napoca (Romania)","institution_ids":["https://openalex.org/I158333966"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079467248","display_name":"Bogdan Moro\u0219anu","orcid":"https://orcid.org/0009-0008-7416-1161"},"institutions":[{"id":"https://openalex.org/I158333966","display_name":"Technical University of Cluj-Napoca","ror":"https://ror.org/03r8nwp71","country_code":"RO","type":"education","lineage":["https://openalex.org/I158333966"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Bogdan Morosanu","raw_affiliation_strings":["Technical Univ. of Cluj-Napoca (Romania)"],"affiliations":[{"raw_affiliation_string":"Technical Univ. of Cluj-Napoca (Romania)","institution_ids":["https://openalex.org/I158333966"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5070444955"],"corresponding_institution_ids":["https://openalex.org/I158333966"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.03007999,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"1","issue":null,"first_page":"67","last_page":"67"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8779759407043457},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8348884582519531},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6566851139068604},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.6218331456184387},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6089568138122559},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5367575287818909},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.4969337284564972},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.48477834463119507},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.4798699915409088},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.46632152795791626},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4603898227214813},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4284844398498535},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.38644617795944214}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8779759407043457},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8348884582519531},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6566851139068604},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.6218331456184387},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6089568138122559},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5367575287818909},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.4969337284564972},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.48477834463119507},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.4798699915409088},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.46632152795791626},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4603898227214813},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4284844398498535},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.38644617795944214},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1117/12.2310071","is_oa":false,"landing_page_url":"https://doi.org/10.1117/12.2310071","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Tenth International Conference on Machine Vision (ICMV 2017)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1570212550","https://openalex.org/W1686810756","https://openalex.org/W1813659000","https://openalex.org/W1847088711","https://openalex.org/W1861492603","https://openalex.org/W1965555277","https://openalex.org/W2064675550","https://openalex.org/W2097117768","https://openalex.org/W2099057450","https://openalex.org/W2117539524","https://openalex.org/W2146950091","https://openalex.org/W2194775991","https://openalex.org/W2463955103","https://openalex.org/W2592463526","https://openalex.org/W2951183276","https://openalex.org/W2964201867","https://openalex.org/W3125923133","https://openalex.org/W4240935049","https://openalex.org/W4294170691","https://openalex.org/W4298289240","https://openalex.org/W6634246325","https://openalex.org/W6637373629","https://openalex.org/W6637568146","https://openalex.org/W6638304892","https://openalex.org/W6638824847","https://openalex.org/W6674887261","https://openalex.org/W6674914833","https://openalex.org/W6682691769","https://openalex.org/W6684813833","https://openalex.org/W6687483927","https://openalex.org/W6719092668","https://openalex.org/W6732249622","https://openalex.org/W6734767647","https://openalex.org/W6889837751"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2775506363","https://openalex.org/W3088136942","https://openalex.org/W4290852288","https://openalex.org/W2949362007","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W2949522393","https://openalex.org/W3009270862"],"abstract_inverted_index":{"Automatic":[0],"captioning":[1],"systems":[2],"based":[3],"on":[4,32,75],"recurrent":[5],"neural":[6],"networks":[7],"have":[8],"been":[9],"tremendously":[10],"successful":[11],"at":[12],"providing":[13],"realistic":[14],"natural":[15,47],"language":[16,48],"captions":[17],"for":[18,27,104],"complex":[19],"and":[20,50,72,124,144,169],"varied":[21],"image":[22,34,63,100],"data.":[23],"We":[24,109],"explore":[25],"methods":[26],"adapting":[28],"existing":[29],"models":[30],"trained":[31],"large":[33,184],"caption":[35,89,106],"data":[36],"sets":[37],"to":[38,68,118,140,151,178],"a":[39,85,156,164,170,176,180],"similar":[40],"problem,":[41],"that":[42,64],"of":[43,60,87,97,102,129,135,146,159,166,183],"summarising":[44],"videos":[45],"using":[46],"descriptions":[49],"frame":[51],"selection.":[52],"These":[53],"architectures":[54],"create":[55,125],"internal":[56],"high":[57,147],"level":[58],"representations":[59],"the":[61,88,93,105,120,141,160],"input":[62],"can":[65,110],"be":[66],"used":[67],"define":[69],"probability":[70,96],"distributions":[71],"distance":[73],"metrics":[74],"these":[76],"distributions.":[77],"Specifically,":[78],"we":[79,162],"interpret":[80],"each":[81],"hidden":[82],"unit":[83],"inside":[84],"layer":[86],"model":[90],"as":[91,138,149],"representing":[92],"un-normalised":[94],"log":[95],"some":[98],"unknown":[99],"feature":[101],"interest":[103],"generation":[107],"process.":[108],"then":[111],"apply":[112],"well":[113],"understood":[114],"statistical":[115],"divergence":[116,137,148],"measures":[117],"express":[119],"difference":[121],"between":[122],"images":[123,134],"an":[126],"unsupervised":[127],"segmentation":[128],"video":[130,186],"frames,":[131],"classifying":[132],"consecutive":[133],"low":[136],"belonging":[139,150],"same":[142],"context,":[143],"those":[145],"different":[152],"contexts.":[153],"To":[154],"provide":[155,163],"final":[157],"summary":[158],"video,":[161],"group":[165],"selected":[167],"frames":[168],"text":[171],"description":[172],"accompanying":[173],"them,":[174],"allowing":[175],"user":[177],"perform":[179],"quick":[181],"exploration":[182],"unlabeled":[185],"databases.":[187]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
