{"id":"https://openalex.org/W3182937942","doi":"https://doi.org/10.1145/3404835.3462838","title":"GilBERT: Generative Vision-Language Pre-Training for Image-Text Retrieval","display_name":"GilBERT: Generative Vision-Language Pre-Training for Image-Text Retrieval","publication_year":2021,"publication_date":"2021-07-11","ids":{"openalex":"https://openalex.org/W3182937942","doi":"https://doi.org/10.1145/3404835.3462838","mag":"3182937942"},"language":"en","primary_location":{"id":"doi:10.1145/3404835.3462838","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3404835.3462838","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3404835.3462838","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3404835.3462838","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054872468","display_name":"Weixiang Hong","orcid":"https://orcid.org/0000-0002-3794-3972"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Weixiang Hong","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000713513","display_name":"Kaixiang Ji","orcid":"https://orcid.org/0000-0002-4669-8622"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kaixiang Ji","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108050448","display_name":"Jiajia Liu","orcid":"https://orcid.org/0000-0003-4273-8866"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiajia Liu","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100370514","display_name":"Jian Wang","orcid":"https://orcid.org/0000-0002-9292-0810"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jian Wang","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056129529","display_name":"Jingdong Chen","orcid":"https://orcid.org/0000-0003-0083-9247"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jingdong Chen","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034946345","display_name":"Wei-Ta Chu","orcid":"https://orcid.org/0000-0001-5722-7239"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei Chu","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5054872468"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.5938,"has_fulltext":true,"cited_by_count":36,"citation_normalized_percentile":{"value":0.91470588,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1379","last_page":"1388"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7688640356063843},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.7338020205497742},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5919095873832703},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.5898208618164062},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5724126100540161},{"id":"https://openalex.org/keywords/visual-word","display_name":"Visual Word","score":0.5587754249572754},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5141925811767578},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.49993038177490234},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.49307510256767273},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.47478124499320984},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4571824073791504},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.45531946420669556},{"id":"https://openalex.org/keywords/automatic-image-annotation","display_name":"Automatic image annotation","score":0.436184823513031},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3930412232875824},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.10828733444213867}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7688640356063843},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.7338020205497742},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5919095873832703},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.5898208618164062},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5724126100540161},{"id":"https://openalex.org/C189391414","wikidata":"https://www.wikidata.org/wiki/Q7936579","display_name":"Visual Word","level":4,"score":0.5587754249572754},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5141925811767578},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.49993038177490234},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.49307510256767273},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.47478124499320984},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4571824073791504},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.45531946420669556},{"id":"https://openalex.org/C199579030","wikidata":"https://www.wikidata.org/wiki/Q2851778","display_name":"Automatic image annotation","level":4,"score":0.436184823513031},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3930412232875824},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.10828733444213867},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3404835.3462838","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3404835.3462838","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3404835.3462838","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3404835.3462838","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3404835.3462838","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3404835.3462838","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.7400000095367432,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3182937942.pdf","grobid_xml":"https://content.openalex.org/works/W3182937942.grobid-xml"},"referenced_works_count":72,"referenced_works":["https://openalex.org/W1485009520","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W2062955551","https://openalex.org/W2100398441","https://openalex.org/W2108598243","https://openalex.org/W2109586012","https://openalex.org/W2125389028","https://openalex.org/W2171572695","https://openalex.org/W2173183968","https://openalex.org/W2185175083","https://openalex.org/W2277195237","https://openalex.org/W2560609797","https://openalex.org/W2560730294","https://openalex.org/W2564591810","https://openalex.org/W2604390606","https://openalex.org/W2607151106","https://openalex.org/W2626778328","https://openalex.org/W2737244074","https://openalex.org/W2754927243","https://openalex.org/W2755721434","https://openalex.org/W2777295410","https://openalex.org/W2788444646","https://openalex.org/W2789016945","https://openalex.org/W2798964604","https://openalex.org/W2808298057","https://openalex.org/W2886641317","https://openalex.org/W2889119508","https://openalex.org/W2899335602","https://openalex.org/W2899505139","https://openalex.org/W2904565150","https://openalex.org/W2949178656","https://openalex.org/W2949999304","https://openalex.org/W2953106684","https://openalex.org/W2953118818","https://openalex.org/W2954203194","https://openalex.org/W2962964995","https://openalex.org/W2963341956","https://openalex.org/W2963467339","https://openalex.org/W2963518342","https://openalex.org/W2964018924","https://openalex.org/W2964024144","https://openalex.org/W2964216930","https://openalex.org/W2965289598","https://openalex.org/W2968124245","https://openalex.org/W2969876226","https://openalex.org/W2970231061","https://openalex.org/W2970608575","https://openalex.org/W2970869018","https://openalex.org/W2975501350","https://openalex.org/W2981851019","https://openalex.org/W2986670728","https://openalex.org/W2988803389","https://openalex.org/W2988823324","https://openalex.org/W2994818707","https://openalex.org/W2997591391","https://openalex.org/W3018658244","https://openalex.org/W3025709990","https://openalex.org/W3034727271","https://openalex.org/W3035500781","https://openalex.org/W3090449556","https://openalex.org/W3091588028","https://openalex.org/W3094597186","https://openalex.org/W3104279398","https://openalex.org/W3154430790","https://openalex.org/W6600018615","https://openalex.org/W6600134738","https://openalex.org/W6600553734","https://openalex.org/W6600617704","https://openalex.org/W6601223237","https://openalex.org/W6608993855"],"related_works":["https://openalex.org/W2123147980","https://openalex.org/W1539573266","https://openalex.org/W4246725171","https://openalex.org/W2184604223","https://openalex.org/W2167764573","https://openalex.org/W193217018","https://openalex.org/W2184598298","https://openalex.org/W4287330208","https://openalex.org/W3201919367","https://openalex.org/W3150772620"],"abstract_inverted_index":{"Given":[0],"a":[1,38,94,106],"text/image":[2,161],"query,":[3],"image-text":[4,25,42,58,71,79,120,170,191],"retrieval":[5,26,80,139],"aims":[6],"to":[7,52,78,114,167,179],"find":[8],"the":[9,13,53,61,73,124,133,151,169,186],"relevant":[10],"items":[11],"in":[12,57,82,190,193],"database.":[14],"Recently,":[15],"visual-linguistic":[16,29,108],"pre-training":[17,109],"(VLP)":[18],"methods":[19,33,88],"have":[20],"demonstrated":[21],"promising":[22],"accuracy":[23],"on":[24,37,46],"and":[27,97,122,147,198],"other":[28],"tasks.":[30,49],"These":[31],"VLP":[32,77,87],"are":[34],"typically":[35],"pre-trained":[36],"large":[39],"amount":[40],"of":[41,76,119,188,195],"pairs,":[43],"then":[44],"fine-tuned":[45],"various":[47],"downstream":[48],"Nevertheless,":[50],"due":[51],"natural":[54],"modality":[55,126],"incompleteness":[56],"retrieval,":[59,192],"i.e.,":[60],"query":[62,96,146],"is":[63],"either":[64],"image":[65],"or":[66],"text":[67],"rather":[68],"than":[69],"an":[70],"pair,":[72],"naive":[74],"application":[75],"results":[81],"significant":[83],"inefficiency.":[84],"Moreover,":[85,150],"existing":[86],"cannot":[89],"extract":[90],"comparable":[91],"representations":[92,118],"for":[93,127,145],"single-modal":[95],"multi-modal":[98],"database":[99,148],"items.":[100,149],"In":[101,130],"this":[102],"work,":[103],"we":[104],"propose":[105],"generative":[107,152],"approach,":[110],"termed":[111],"as":[112],"GilBERT,":[113],"simultaneously":[115],"learn":[116],"generic":[117],"data":[121],"complete":[123],"missing":[125],"incomplete":[128],"pairs.":[129],"testing":[131],"phase,":[132],"proposed":[134],"GilBERT":[135,157,166,189],"facilitates":[136],"efficient":[137],"vector-based":[138],"by":[140],"providing":[141],"unified":[142],"feature":[143],"embedding":[144],"training":[153],"not":[154],"only":[155],"makes":[156],"compatible":[158],"with":[159],"non-parallel":[160],"corpus,":[162],"but":[163],"also":[164],"enables":[165],"model":[168],"relationships":[171],"without":[172],"suffering":[173],"massive":[174],"randomly-sampled":[175],"negative":[176],"samples,":[177],"leading":[178],"superior":[180],"experimental":[181],"performances.":[182],"Extensive":[183],"experiments":[184],"demonstrate":[185],"advantages":[187],"terms":[194],"both":[196],"efficiency":[197],"accuracy.":[199]},"counts_by_year":[{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
