{"id":"https://openalex.org/W2797733588","doi":"https://doi.org/10.1145/3240508.3240640","title":"Decoupled Novel Object Captioner","display_name":"Decoupled Novel Object Captioner","publication_year":2018,"publication_date":"2018-10-15","ids":{"openalex":"https://openalex.org/W2797733588","doi":"https://doi.org/10.1145/3240508.3240640","mag":"2797733588"},"language":"en","primary_location":{"id":"doi:10.1145/3240508.3240640","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3240508.3240640","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3240508.3240640","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM international conference on Multimedia","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3240508.3240640","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100324098","display_name":"Yu Wu","orcid":"https://orcid.org/0000-0002-1680-8253"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Yu Wu","raw_affiliation_strings":["University of Technology Sydney, Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"University of Technology Sydney, Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043617790","display_name":"Linchao Zhu","orcid":"https://orcid.org/0000-0002-4093-7557"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Linchao Zhu","raw_affiliation_strings":["University of Technology Sydney, Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"University of Technology Sydney, Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090730336","display_name":"Lu Jiang","orcid":"https://orcid.org/0000-0003-0286-8439"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lu Jiang","raw_affiliation_strings":["Google Inc., San Francisco, USA"],"affiliations":[{"raw_affiliation_string":"Google Inc., San Francisco, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005421447","display_name":"Yi Yang","orcid":"https://orcid.org/0000-0002-0512-880X"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Yi Yang","raw_affiliation_strings":["University of Technology Sydney &amp; Chinese Academy of Sciences, Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"University of Technology Sydney &amp; Chinese Academy of Sciences, Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100324098"],"corresponding_institution_ids":["https://openalex.org/I114017466"],"apc_list":null,"apc_paid":null,"fwci":5.5288,"has_fulltext":true,"cited_by_count":70,"citation_normalized_percentile":{"value":0.96948977,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1029","last_page":"1037"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9434901475906372},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.8298448324203491},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8184321522712708},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.7292592525482178},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6679601669311523},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6375917792320251},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6105163097381592},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5461309552192688},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5451937317848206},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.44513189792633057},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.38010460138320923},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1090235710144043}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9434901475906372},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.8298448324203491},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8184321522712708},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.7292592525482178},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6679601669311523},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6375917792320251},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6105163097381592},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5461309552192688},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5451937317848206},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.44513189792633057},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.38010460138320923},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1090235710144043},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3240508.3240640","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3240508.3240640","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3240508.3240640","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM international conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1804.03803","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1804.03803","pdf_url":"https://arxiv.org/pdf/1804.03803","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:opus.lib.uts.edu.au:10453/131493","is_oa":false,"landing_page_url":"http://hdl.handle.net/10453/131493","pdf_url":null,"source":{"id":"https://openalex.org/S4306401357","display_name":"UTS ePRESS (University of Technology Sydney)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I114017466","host_organization_name":"University of Technology Sydney","host_organization_lineage":["https://openalex.org/I114017466"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Proceeding"}],"best_oa_location":{"id":"doi:10.1145/3240508.3240640","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3240508.3240640","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3240508.3240640","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM international conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","score":0.7099999785423279,"display_name":"Decent work and economic growth"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320335741","display_name":"Data to Decisions Cooperative Research Centres","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2797733588.pdf","grobid_xml":"https://content.openalex.org/works/W2797733588.grobid-xml"},"referenced_works_count":51,"referenced_works":["https://openalex.org/W8316075","https://openalex.org/W648786980","https://openalex.org/W1514535095","https://openalex.org/W1686810756","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1897761818","https://openalex.org/W1905882502","https://openalex.org/W1947481528","https://openalex.org/W1969616664","https://openalex.org/W1999970330","https://openalex.org/W2028502081","https://openalex.org/W2064675550","https://openalex.org/W2077071968","https://openalex.org/W2102381086","https://openalex.org/W2109586012","https://openalex.org/W2123301721","https://openalex.org/W2128532956","https://openalex.org/W2139380585","https://openalex.org/W2139501017","https://openalex.org/W2171361956","https://openalex.org/W2173180041","https://openalex.org/W2274287116","https://openalex.org/W2399033357","https://openalex.org/W2432717477","https://openalex.org/W2463955103","https://openalex.org/W2557728737","https://openalex.org/W2735159761","https://openalex.org/W2753232960","https://openalex.org/W2897870704","https://openalex.org/W2950178297","https://openalex.org/W2950304420","https://openalex.org/W2951183276","https://openalex.org/W2951775809","https://openalex.org/W2952342379","https://openalex.org/W2953022248","https://openalex.org/W2953106684","https://openalex.org/W2953158660","https://openalex.org/W2953384591","https://openalex.org/W2962706528","https://openalex.org/W2962835968","https://openalex.org/W2963088515","https://openalex.org/W2963175879","https://openalex.org/W2963248296","https://openalex.org/W2963499153","https://openalex.org/W2963758027","https://openalex.org/W2963877622","https://openalex.org/W2964121744","https://openalex.org/W2964350391","https://openalex.org/W4241542682","https://openalex.org/W6713134421"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2775506363","https://openalex.org/W3088136942","https://openalex.org/W4290852288","https://openalex.org/W2949362007","https://openalex.org/W4388893791","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W3217195652"],"abstract_inverted_index":{"Image":[0],"captioning":[1,32,103],"is":[2,83,209],"a":[3,20,30,39,125,157,231],"challenging":[4,121],"task":[5,104],"where":[6,105],"the":[7,50,64,74,79,99,106,115,120,136,141,153,169,176,187,193,197,207,213,216,226,241,246],"machine":[8,107],"automatically":[9],"describes":[10],"an":[11,164],"image":[12],"by":[13,76],"sentences":[14,77,113],"or":[15],"phrases.":[16],"It":[17,82],"often":[18],"requires":[19,68],"large":[21],"number":[22],"of":[23,248],"paired":[24],"image-sentence":[25],"annotations":[26],"for":[27,200],"training.":[28,60],"However,":[29],"pre-trained":[31],"model":[33,59,139,171],"can":[34,133,172],"hardly":[35],"be":[36,173,223],"applied":[37],"to":[38,72,211],"new":[40],"domain":[41],"in":[42,91,230,250],"which":[43],"some":[44],"novel":[45,65,80,101,116,166,177,234,252],"object":[46,102,142,178,183,217,235],"categories":[47],"exist,":[48],"i.e.,":[49],"objects":[51],"and":[52,86,196],"their":[53],"description":[54],"words":[55,214],"are":[56],"unseen":[57,165],"during":[58],"To":[61,118],"correctly":[62],"caption":[63,232],"object,":[66],"it":[67],"professional":[69],"human":[70],"workers":[71],"annotate":[73],"images":[75],"with":[78,152,225,233],"words.":[81],"labor":[84],"expensive":[85],"thus":[87],"limits":[88],"its":[89],"usage":[90],"real-world":[92],"applications.":[93],"In":[94],"this":[95],"paper,":[96],"we":[97,123],"introduce":[98],"zero-shot":[100],"generates":[108,156],"descriptions":[109],"without":[110],"extra":[111],"training":[112],"about":[114],"object.":[117,167,202],"tackle":[119],"problem,":[122],"propose":[124],"Decoupled":[126],"Novel":[127],"Object":[128],"Captioner":[129],"(DNOC)":[130],"framework":[131],"that":[132],"fully":[134],"decouple":[135],"language":[137],"sequence":[138,170],"from":[140,175,206,215],"descriptions.":[143,179,236],"DNOC":[144,249],"has":[145],"two":[146],"components.":[147],"1)":[148],"A":[149,181,203],"Sequence":[150],"Model":[151],"Placeholder":[154],"(SM-P)":[155],"sentence":[158],"containing":[159],"placeholders.":[160],"The":[161,219,237],"placeholder":[162,220],"represents":[163],"Thus,":[168],"decoupled":[174],"2)":[180],"key-value":[182],"memory":[184],"built":[185],"upon":[186],"freely":[188],"available":[189],"detection":[190],"model,":[191],"contains":[192],"visual":[194],"information":[195],"corresponding":[198],"word":[199],"each":[201],"query":[204],"generated":[205],"SM-P":[208],"used":[210],"retrieve":[212],"memory.":[218],"will":[221],"further":[222],"filled":[224],"correct":[227],"word,":[228],"resulting":[229],"experimental":[238],"results":[239],"on":[240],"held-out":[242],"MSCOCO":[243],"dataset":[244],"demonstrate":[245],"ability":[247],"describing":[251],"concepts.":[253]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":20},{"year":2020,"cited_by_count":15},{"year":2019,"cited_by_count":14},{"year":2018,"cited_by_count":3}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
