{"id":"https://openalex.org/W4283075937","doi":"https://doi.org/10.1109/wacvw58289.2023.00042","title":"MixGen: A New Multi-Modal Data Augmentation","display_name":"MixGen: A New Multi-Modal Data Augmentation","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4283075937","doi":"https://doi.org/10.1109/wacvw58289.2023.00042"},"language":"en","primary_location":{"id":"doi:10.1109/wacvw58289.2023.00042","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacvw58289.2023.00042","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/CVF Winter Conference on Applications of Computer Vision Workshops (WACVW)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016760086","display_name":"Xiaoshuai Hao","orcid":"https://orcid.org/0009-0007-4209-6695"},"institutions":[{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaoshuai Hao","raw_affiliation_strings":["Institute of Information Engineering, CAS"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, CAS","institution_ids":["https://openalex.org/I4210156404"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100534674","display_name":"Yi Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yi Zhu","raw_affiliation_strings":["Amazon Web Services"],"affiliations":[{"raw_affiliation_string":"Amazon Web Services","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003062075","display_name":"Srikar Appalaraju","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Srikar Appalaraju","raw_affiliation_strings":["Amazon Web Services"],"affiliations":[{"raw_affiliation_string":"Amazon Web Services","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053262611","display_name":"Aston Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aston Zhang","raw_affiliation_strings":["Amazon Web Services"],"affiliations":[{"raw_affiliation_string":"Amazon Web Services","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017704875","display_name":"Wanqian Zhang","orcid":"https://orcid.org/0000-0001-5734-4072"},"institutions":[{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wanqian Zhang","raw_affiliation_strings":["Institute of Information Engineering, CAS"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, CAS","institution_ids":["https://openalex.org/I4210156404"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100374360","display_name":"Bo Li","orcid":"https://orcid.org/0000-0001-6709-0942"},"institutions":[{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Li","raw_affiliation_strings":["Institute of Information Engineering, CAS"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, CAS","institution_ids":["https://openalex.org/I4210156404"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100399466","display_name":"Mu Li","orcid":"https://orcid.org/0000-0002-7327-3304"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mu Li","raw_affiliation_strings":["Amazon Web Services"],"affiliations":[{"raw_affiliation_string":"Amazon Web Services","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5016760086"],"corresponding_institution_ids":["https://openalex.org/I4210156404"],"apc_list":null,"apc_paid":null,"fwci":11.2383,"has_fulltext":false,"cited_by_count":92,"citation_normalized_percentile":{"value":0.99009694,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"379","last_page":"389"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7735095024108887},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.7250368595123291},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5457961559295654},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.543552577495575},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5087376236915588},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.47049498558044434},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.4541085362434387},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.45162904262542725},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.4338359832763672},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.4162355363368988},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4121095836162567},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3626810610294342}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7735095024108887},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.7250368595123291},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5457961559295654},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.543552577495575},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5087376236915588},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.47049498558044434},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.4541085362434387},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.45162904262542725},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.4338359832763672},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.4162355363368988},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4121095836162567},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3626810610294342},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/wacvw58289.2023.00042","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacvw58289.2023.00042","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/CVF Winter Conference on Applications of Computer Vision Workshops (WACVW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4000000059604645,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":101,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2109586012","https://openalex.org/W2170240176","https://openalex.org/W2194775991","https://openalex.org/W2251658415","https://openalex.org/W2277195237","https://openalex.org/W2489434015","https://openalex.org/W2560730294","https://openalex.org/W2746314669","https://openalex.org/W2763421725","https://openalex.org/W2765407302","https://openalex.org/W2896457183","https://openalex.org/W2908510526","https://openalex.org/W2912371042","https://openalex.org/W2946411231","https://openalex.org/W2949736877","https://openalex.org/W2962369866","https://openalex.org/W2963109634","https://openalex.org/W2963530300","https://openalex.org/W2964187781","https://openalex.org/W2966645965","https://openalex.org/W2966715458","https://openalex.org/W2968124245","https://openalex.org/W2969876226","https://openalex.org/W2970231061","https://openalex.org/W2971296908","https://openalex.org/W2987401211","https://openalex.org/W2991266149","https://openalex.org/W2992308087","https://openalex.org/W2998508940","https://openalex.org/W3001555892","https://openalex.org/W3033721867","https://openalex.org/W3035682985","https://openalex.org/W3035688398","https://openalex.org/W3086731747","https://openalex.org/W3089044119","https://openalex.org/W3090449556","https://openalex.org/W3091588028","https://openalex.org/W3103542727","https://openalex.org/W3105966348","https://openalex.org/W3108512475","https://openalex.org/W3126337491","https://openalex.org/W3128868613","https://openalex.org/W3152798676","https://openalex.org/W3159619744","https://openalex.org/W3166396011","https://openalex.org/W3167118264","https://openalex.org/W3176641147","https://openalex.org/W3176664887","https://openalex.org/W3176824248","https://openalex.org/W3177224328","https://openalex.org/W3184735396","https://openalex.org/W3184784418","https://openalex.org/W3188030217","https://openalex.org/W3193402170","https://openalex.org/W3208314443","https://openalex.org/W3209532394","https://openalex.org/W4212915707","https://openalex.org/W4224035735","https://openalex.org/W4226182655","https://openalex.org/W4229042118","https://openalex.org/W4287727780","https://openalex.org/W4312877428","https://openalex.org/W4312956471","https://openalex.org/W4312980231","https://openalex.org/W6676497082","https://openalex.org/W6685053522","https://openalex.org/W6691212626","https://openalex.org/W6743428213","https://openalex.org/W6745136726","https://openalex.org/W6745245109","https://openalex.org/W6752474537","https://openalex.org/W6755207826","https://openalex.org/W6757817989","https://openalex.org/W6758704467","https://openalex.org/W6763165154","https://openalex.org/W6763882748","https://openalex.org/W6765939562","https://openalex.org/W6766904570","https://openalex.org/W6767211374","https://openalex.org/W6767279747","https://openalex.org/W6768086466","https://openalex.org/W6773248631","https://openalex.org/W6778963470","https://openalex.org/W6779447676","https://openalex.org/W6779473860","https://openalex.org/W6780385599","https://openalex.org/W6786030337","https://openalex.org/W6789909235","https://openalex.org/W6790019176","https://openalex.org/W6790953777","https://openalex.org/W6791353385","https://openalex.org/W6793759846","https://openalex.org/W6798805250","https://openalex.org/W6800139874","https://openalex.org/W6802987763","https://openalex.org/W6803316292","https://openalex.org/W6803872405","https://openalex.org/W6809885388","https://openalex.org/W6811013733","https://openalex.org/W6811072154"],"related_works":["https://openalex.org/W3157284875","https://openalex.org/W2147241511","https://openalex.org/W2259406085","https://openalex.org/W2099715052","https://openalex.org/W4226247999","https://openalex.org/W3090872036","https://openalex.org/W3209772662","https://openalex.org/W4200629926","https://openalex.org/W4220955952","https://openalex.org/W4287868219"],"abstract_inverted_index":{"Data":[0],"augmentation":[1,37],"is":[2,16],"a":[3,34],"necessity":[4],"to":[5,42,88,102],"enhance":[6],"data":[7,15,36,45],"efficiency":[8],"in":[9,25,98],"deep":[10],"learning.":[11],"For":[12,94],"vision-language":[13,39,86],"pre-training,":[14],"only":[17],"augmented":[18],"either":[19],"for":[20,23,38],"images":[21,58],"or":[22],"text":[24],"previous":[26],"works.":[27],"In":[28],"this":[29],"paper,":[30],"we":[31],"present":[32],"MixGen:":[33],"joint":[35],"representation":[40],"learning":[41],"further":[43],"improve":[44],"efficiency.":[46],"It":[47],"generates":[48],"new":[49],"image-text":[50,109],"pairs":[51],"with":[52],"semantic":[53],"relationships":[54],"preserved":[55],"by":[56],"interpolating":[57],"and":[59,64,81,92,115,140],"concatenating":[60],"text.":[61],"It's":[62],"simple,":[63],"can":[65],"be":[66],"plug-and-played":[67],"into":[68],"existing":[69],"pipelines.":[70],"We":[71],"evaluate":[72],"MixGen":[73,97],"on":[74,106,112,117,123,128,138,144],"four":[75],"architectures,":[76],"including":[77],"CLIP,":[78],"ViLT,":[79],"ALBEF":[80,99],"TCL,":[82],"across":[83],"five":[84],"downstream":[85,107],"tasks":[87],"show":[89],"its":[90],"versatility":[91],"effectiveness.":[93],"example,":[95],"adding":[96],"pre-training":[100],"leads":[101],"absolute":[103],"performance":[104],"improvements":[105],"tasks:":[108],"retrieval":[110],"(+6.2%":[111],"COCO":[113],"fine-tuned":[114],"+5.3%":[116],"Flicker30K":[118],"zero-shot),":[119],"visual":[120,125,134,141],"grounding":[121],"(+0.9%":[122,127],"Re-fCOCO+),":[124],"reasoning":[126],"NLVR":[129],"<sup":[130],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[131],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2</sup>":[132],"),":[133],"question":[135],"answering":[136],"(+0.3%":[137],"VQA2.0),":[139],"entail-ment":[142],"(+0.4%":[143],"SNLI-VE).":[145]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":32},{"year":2024,"cited_by_count":35},{"year":2023,"cited_by_count":21}],"updated_date":"2026-03-14T08:43:22.919905","created_date":"2025-10-10T00:00:00"}
