{"id":"https://openalex.org/W2525981529","doi":"https://doi.org/10.1145/2964284.2973831","title":"Image2Text","display_name":"Image2Text","publication_year":2016,"publication_date":"2016-09-29","ids":{"openalex":"https://openalex.org/W2525981529","doi":"https://doi.org/10.1145/2964284.2973831","mag":"2525981529"},"language":"en","primary_location":{"id":"doi:10.1145/2964284.2973831","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2964284.2973831","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM international conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100353141","display_name":"Chang Liu","orcid":"https://orcid.org/0000-0001-5560-7203"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chang Liu","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055085354","display_name":"Changhu Wang","orcid":"https://orcid.org/0000-0001-8373-2597"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changhu Wang","raw_affiliation_strings":["Multimedia Search and Mining/Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Multimedia Search and Mining/Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055546056","display_name":"Fuchun Sun","orcid":"https://orcid.org/0000-0003-3546-6305"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fuchun Sun","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100728762","display_name":"Yong Rui","orcid":"https://orcid.org/0000-0002-9142-5914"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Rui","raw_affiliation_strings":["Multimedia Search and Mining/Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Multimedia Search and Mining/Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100353141"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":1.002,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.83353752,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"746","last_page":"748"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9857000112533569,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9702777862548828},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8705819845199585},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6631261110305786},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.64220130443573},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.5974411368370056},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.568656325340271},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5668593645095825},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5573347806930542},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5278465747833252},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4668228030204773},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.45287665724754333},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.44126221537590027},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4324776530265808},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.4213504493236542},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3882591724395752},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3159913420677185}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9702777862548828},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8705819845199585},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6631261110305786},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.64220130443573},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.5974411368370056},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.568656325340271},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5668593645095825},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5573347806930542},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5278465747833252},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4668228030204773},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.45287665724754333},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.44126221537590027},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4324776530265808},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.4213504493236542},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3882591724395752},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3159913420677185},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2964284.2973831","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2964284.2973831","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM international conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1947481528","https://openalex.org/W2064675550","https://openalex.org/W2220981600","https://openalex.org/W2962706528"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2775506363","https://openalex.org/W3088136942","https://openalex.org/W4290852288","https://openalex.org/W2949362007","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W2949522393","https://openalex.org/W3009270862"],"abstract_inverted_index":{"In":[0],"this":[1],"work,":[2],"we":[3,41,72,100],"showcase":[4],"the":[5,27,60,76,88,92,97],"Image2Text":[6],"system,":[7],"which":[8],"is":[9,63],"a":[10,33,43,66,80,102,132],"real-time":[11],"captioning":[12,31,98],"system":[13,104,113],"that":[14],"can":[15],"generate":[16],"human-level":[17,108],"natural":[18],"language":[19],"description":[20],"for":[21,50,110],"any":[22],"input":[23,77],"image.":[24],"We":[25],"formulate":[26],"problem":[28],"of":[29,82,91],"image":[30,51,62,78],"as":[32,79,87],"multimodal":[34],"translation":[35],"task.":[36],"Analogous":[37],"to":[38,74,85,105,117],"machine":[39],"translation,":[40],"present":[42],"sequence-to-sequence":[44],"recurrent":[45],"neural":[46,68],"networks":[47,69],"(RNN)":[48],"model":[49],"caption":[52],"generation.":[53],"Different":[54],"from":[55,131],"most":[56],"existing":[57],"work":[58],"where":[59],"whole":[61],"represented":[64],"by":[65],"convolutional":[67],"(CNN)":[70],"feature,":[71],"propose":[73],"represent":[75],"sequence":[81,90],"detected":[83],"objects":[84,120],"serve":[86],"source":[89],"RNN":[93],"model.":[94],"Based":[95],"on":[96],"framework,":[99],"develop":[101],"user-friendly":[103],"automatically":[106],"generated":[107],"captions":[109],"users.":[111],"The":[112],"also":[114],"enables":[115],"users":[116],"detect":[118],"salient":[119],"in":[121],"an":[122],"image,":[123],"and":[124,128],"retrieve":[125],"similar":[126],"images":[127],"corresponding":[129],"descriptions":[130],"database.":[133]},"counts_by_year":[{"year":2024,"cited_by_count":4},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2016-10-07T00:00:00"}
