{"id":"https://openalex.org/W2791822325","doi":"https://doi.org/10.1145/3163080.3163108","title":"Amalgamation of Video Description and Multiple Object Localization using single Deep Learning Model","display_name":"Amalgamation of Video Description and Multiple Object Localization using single Deep Learning Model","publication_year":2017,"publication_date":"2017-11-27","ids":{"openalex":"https://openalex.org/W2791822325","doi":"https://doi.org/10.1145/3163080.3163108","mag":"2791822325"},"language":"en","primary_location":{"id":"doi:10.1145/3163080.3163108","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3163080.3163108","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th International Conference on Signal Processing Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032382984","display_name":"Shivam Duggal","orcid":null},"institutions":[{"id":"https://openalex.org/I863896202","display_name":"Delhi Technological University","ror":"https://ror.org/01ztcvt22","country_code":"IN","type":"education","lineage":["https://openalex.org/I863896202"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Shivam Duggal","raw_affiliation_strings":["Delhi Technological University, Shahbad Daulatpur, Delhi"],"affiliations":[{"raw_affiliation_string":"Delhi Technological University, Shahbad Daulatpur, Delhi","institution_ids":["https://openalex.org/I863896202"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015720154","display_name":"Shrey Manik","orcid":null},"institutions":[{"id":"https://openalex.org/I863896202","display_name":"Delhi Technological University","ror":"https://ror.org/01ztcvt22","country_code":"IN","type":"education","lineage":["https://openalex.org/I863896202"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Shrey Manik","raw_affiliation_strings":["Delhi Technological University, Shahbad Daulatpur, Delhi"],"affiliations":[{"raw_affiliation_string":"Delhi Technological University, Shahbad Daulatpur, Delhi","institution_ids":["https://openalex.org/I863896202"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073406380","display_name":"Mohan Ghai","orcid":null},"institutions":[{"id":"https://openalex.org/I863896202","display_name":"Delhi Technological University","ror":"https://ror.org/01ztcvt22","country_code":"IN","type":"education","lineage":["https://openalex.org/I863896202"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Mohan Ghai","raw_affiliation_strings":["Delhi Technological University, Shahbad Daulatpur, Delhi"],"affiliations":[{"raw_affiliation_string":"Delhi Technological University, Shahbad Daulatpur, Delhi","institution_ids":["https://openalex.org/I863896202"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5032382984"],"corresponding_institution_ids":["https://openalex.org/I863896202"],"apc_list":null,"apc_paid":null,"fwci":0.2731,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.66267392,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"109","last_page":"115"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8649671077728271},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.785602331161499},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6483387351036072},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5690137147903442},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.5113608241081238},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5006766319274902},{"id":"https://openalex.org/keywords/joins","display_name":"Joins","score":0.485320508480072},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.476483553647995},{"id":"https://openalex.org/keywords/base","display_name":"Base (topology)","score":0.4577111601829529},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4313179850578308},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4254028797149658},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3511837422847748},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3118208646774292},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.2855769991874695},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.11358925700187683}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8649671077728271},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.785602331161499},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6483387351036072},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5690137147903442},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.5113608241081238},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5006766319274902},{"id":"https://openalex.org/C2778692605","wikidata":"https://www.wikidata.org/wiki/Q4041866","display_name":"Joins","level":2,"score":0.485320508480072},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.476483553647995},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.4577111601829529},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4313179850578308},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4254028797149658},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3511837422847748},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3118208646774292},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2855769991874695},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.11358925700187683},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3163080.3163108","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3163080.3163108","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th International Conference on Signal Processing Systems","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7099999785423279,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1483870316","https://openalex.org/W1536680647","https://openalex.org/W1555385401","https://openalex.org/W1572567476","https://openalex.org/W1586939924","https://openalex.org/W1686810756","https://openalex.org/W1905882502","https://openalex.org/W1947481528","https://openalex.org/W1991367009","https://openalex.org/W2016053056","https://openalex.org/W2046382188","https://openalex.org/W2078238240","https://openalex.org/W2123229215","https://openalex.org/W2139501017","https://openalex.org/W2142900973","https://openalex.org/W2152984213","https://openalex.org/W2156303437","https://openalex.org/W2164290393","https://openalex.org/W2170135819","https://openalex.org/W2177847924","https://openalex.org/W2183182206","https://openalex.org/W2251353663","https://openalex.org/W2741609678","https://openalex.org/W2951159095","https://openalex.org/W2951912364","https://openalex.org/W2953106684","https://openalex.org/W2964241990","https://openalex.org/W6600258949","https://openalex.org/W6600828087"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W3088136942","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4290852288","https://openalex.org/W4388893791","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W2949522393"],"abstract_inverted_index":{"Self-describing":[0],"the":[1,62,71,95,115,122,128,149,153,165,171],"content":[2],"of":[3,64,70,92,151,170],"a":[4,26,40],"video":[5,33],"is":[6,112,127,136],"an":[7],"elementary":[8],"problem":[9],"in":[10,68],"artificial":[11],"intelligence":[12],"that":[13],"joins":[14],"computer":[15],"vision":[16],"and":[17,37,43,97],"natural":[18],"language":[19],"processing.":[20],"Through":[21],"this":[22],"paper,":[23],"we":[24],"propose":[25],"single":[27,47],"system":[28,48],"which":[29,158],"could":[30],"carry":[31],"out":[32],"analysis":[34],"(Object":[35],"Detection":[36],"Captioning)":[38],"at":[39],"reduced":[41],"time":[42],"memory":[44],"complexity.":[45],"This":[46],"uses":[49,83,159],"YOLO":[50],"(You":[51],"Look":[52],"Only":[53],"Once)":[54],"as":[55,114],"its":[56],"base":[57,116,129],"model.":[58],"Moreover,":[59],"to":[60,88,99,120,147,163],"highlight":[61],"importance":[63],"using":[65],"transfer":[66,160],"learning":[67,161],"development":[69,169],"proposed":[72,154,172],"system,":[73],"two":[74,84,123],"more":[75],"approaches":[76],"have":[77],"been":[78],"discussed.":[79],"The":[80,133,142],"rest":[81],"one":[82,87],"discrete":[85],"models,":[86],"extract":[89],"continuous":[90],"bag":[91],"words":[93,104],"from":[94,102],"frames":[96],"other":[98],"generate":[100],"captions":[101],"those":[103],"i.e.":[105],"Language":[106,130],"Model.":[107],"VGG-16":[108],"(Visual":[109],"Geometry":[110],"Group)":[111],"used":[113,135],"image":[117],"decoder":[118],"model":[119],"compare":[121],"approaches,":[124],"while":[125],"LSTM":[126],"Model":[131],"used.":[132],"Dataset":[134],"Microsoft":[137],"Research":[138],"Video":[139],"Description":[140],"Corpus.":[141],"dataset":[143],"was":[144],"manually":[145],"modified":[146],"serve":[148],"purpose":[150],"training":[152],"system.":[155,173],"Second":[156],"approach":[157,167],"proves":[162],"be":[164],"better":[166],"for":[168]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
