{"id":"https://openalex.org/W4392538956","doi":"https://doi.org/10.48550/arxiv.2403.03194","title":"MAGID: An Automated Pipeline for Generating Synthetic Multi-modal Datasets","display_name":"MAGID: An Automated Pipeline for Generating Synthetic Multi-modal Datasets","publication_year":2024,"publication_date":"2024-03-05","ids":{"openalex":"https://openalex.org/W4392538956","doi":"https://doi.org/10.48550/arxiv.2403.03194"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2403.03194","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.03194","pdf_url":"https://arxiv.org/pdf/2403.03194","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2403.03194","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076661689","display_name":"Hossein Aboutalebi","orcid":"https://orcid.org/0000-0003-4396-3993"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aboutalebi, Hossein","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033909285","display_name":"Hwanjun Song","orcid":"https://orcid.org/0000-0002-1105-0818"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Hwanjun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058457237","display_name":"Yusheng Xie","orcid":"https://orcid.org/0000-0002-8581-4614"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Yusheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112464510","display_name":"Arshit Gupta","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gupta, Arshit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020079535","display_name":"Justin Sun","orcid":"https://orcid.org/0000-0002-1879-1598"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Justin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100341891","display_name":"Hang Su","orcid":"https://orcid.org/0000-0002-6877-6783"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Hang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011132902","display_name":"Igor Shalyminov","orcid":"https://orcid.org/0000-0001-9664-1774"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shalyminov, Igor","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084740578","display_name":"\u039d\u03b9\u03ba\u03cc\u03bb\u03b1\u03bf\u03c2 \u03a0\u03b1\u03c0\u03c0\u03ac\u03c2","orcid":"https://orcid.org/0000-0003-4416-7702"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pappas, Nikolaos","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060500303","display_name":"Siffi Singh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Singh, Siffi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5070002963","display_name":"Saab Mansour","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mansour, Saab","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9520000219345093,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9520000219345093,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9243999719619751,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9068999886512756,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.772892415523529},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7709767818450928},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6459317207336426},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37838396430015564},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3458699584007263},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.14650732278823853},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.05011644959449768}],"concepts":[{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.772892415523529},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7709767818450928},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6459317207336426},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37838396430015564},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3458699584007263},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.14650732278823853},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.05011644959449768},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2403.03194","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.03194","pdf_url":"https://arxiv.org/pdf/2403.03194","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2403.03194","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2403.03194","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2403.03194","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.03194","pdf_url":"https://arxiv.org/pdf/2403.03194","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4392538956.pdf","grobid_xml":"https://content.openalex.org/works/W4392538956.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W2530322880"],"abstract_inverted_index":{"Development":[0],"of":[1,10],"multimodal":[2,12],"interactive":[3],"systems":[4],"is":[5,18,65,133,154],"hindered":[6],"by":[7],"the":[8,74,151],"lack":[9],"rich,":[11],"(text,":[13],"images)":[14],"conversational":[15],"data,":[16],"which":[17],"needed":[19],"in":[20,104,143],"large":[21],"quantities":[22],"for":[23],"LLMs.":[24],"Previous":[25],"approaches":[26],"augment":[27,53],"textual":[28],"dialogues":[29,55],"with":[30,56,73,140],"retrieved":[31],"images,":[32,70],"posing":[33],"privacy,":[34],"diversity,":[35],"and":[36,58,92,100,109,125],"quality":[37,94],"constraints.":[38],"In":[39],"this":[40],"work,":[41],"we":[42],"introduce":[43],"Multimodal":[44],"Augmented":[45],"Generative":[46],"Images":[47],"Dialogues":[48],"(MAGID),":[49],"a":[50,62],"framework":[51],"to":[52,67,106,115,135],"text-only":[54],"diverse":[57],"high-quality":[59,108],"images.":[60],"Subsequently,":[61],"diffusion":[63],"model":[64],"applied":[66],"craft":[68],"corresponding":[69],"ensuring":[71],"alignment":[72],"identified":[75],"text.":[76],"Finally,":[77],"MAGID":[78,114,132],"incorporates":[79],"an":[80,85],"innovative":[81],"feedback":[82],"loop":[83],"between":[84],"image":[86,93,152],"description":[87],"generation":[88],"module":[89],"(textual":[90],"LLM)":[91],"modules":[95],"(addressing":[96],"aesthetics,":[97],"image-text":[98],"matching,":[99],"safety),":[101],"that":[102,131],"work":[103],"tandem":[105],"generate":[107],"multi-modal":[110],"dialogues.":[111],"We":[112],"compare":[113],"other":[116],"SOTA":[117],"baselines":[118,149],"on":[119],"three":[120],"dialogue":[121],"datasets,":[122],"using":[123],"automated":[124],"human":[126,144],"evaluation.":[127],"Our":[128],"results":[129],"show":[130],"comparable":[134],"or":[136],"better":[137],"than":[138],"baselines,":[139],"significant":[141],"improvements":[142],"evaluation,":[145],"especially":[146],"against":[147],"retrieval":[148],"where":[150],"database":[153],"small.":[155]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
