{"id":"https://openalex.org/W4404088539","doi":"https://doi.org/10.48550/arxiv.2410.15312","title":"Synergistic Dual Spatial-aware Generation of Image-to-Text and Text-to-Image","display_name":"Synergistic Dual Spatial-aware Generation of Image-to-Text and Text-to-Image","publication_year":2024,"publication_date":"2024-10-20","ids":{"openalex":"https://openalex.org/W4404088539","doi":"https://doi.org/10.48550/arxiv.2410.15312"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2410.15312","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.15312","pdf_url":"https://arxiv.org/pdf/2410.15312","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2410.15312","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100701157","display_name":"Yu Zhao","orcid":"https://orcid.org/0000-0002-0606-4676"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhao, Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055815455","display_name":"Hao Fei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fei, Hao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029645676","display_name":"Xiangtai Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xiangtai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029082837","display_name":"Libo Qin","orcid":"https://orcid.org/0000-0002-3619-675X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qin, Libo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084510895","display_name":"Jiayi Ji","orcid":"https://orcid.org/0000-0002-9956-6308"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ji, Jiayi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110463782","display_name":"Hongyuan Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Hongyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004953265","display_name":"Meishan Zhang","orcid":"https://orcid.org/0000-0001-6335-1340"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Meishan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100402911","display_name":"Min Zhang","orcid":"https://orcid.org/0000-0002-3895-5510"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Min","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5027428789","display_name":"Jianguo Wei","orcid":"https://orcid.org/0000-0002-8964-9759"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Jianguo","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5100701157"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9472000002861023,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9472000002861023,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9042999744415283,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.7065510153770447},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.6256815791130066},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5531759858131409},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5047482252120972},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42909854650497437},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.09544426202774048}],"concepts":[{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.7065510153770447},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.6256815791130066},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5531759858131409},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5047482252120972},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42909854650497437},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.09544426202774048},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2410.15312","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.15312","pdf_url":"https://arxiv.org/pdf/2410.15312","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2410.15312","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2410.15312","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2410.15312","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.15312","pdf_url":"https://arxiv.org/pdf/2410.15312","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1883487206","display_name":null,"funder_award_id":"62336008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4020255992","display_name":null,"funder_award_id":"Project","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5939423041","display_name":null,"funder_award_id":"Technology","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4404088539.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"In":[0,44],"the":[1,37,50,61,69,94,97,107,114,123,127,132,138,148,157],"visual":[2,149],"spatial":[3,7,11,33,41,71,150],"understanding":[4,151],"(VSU)":[5],"area,":[6],"image-to-text":[8],"(SI2T)":[9],"and":[10,52,86,100,109,141,160],"text-to-image":[12],"(ST2I)":[13],"are":[14],"two":[15],"fundamental":[16],"tasks":[17],"that":[18,82,96,137],"appear":[19],"in":[20,32,106],"dual":[21,57,62,170],"form.":[22],"Existing":[23],"methods":[24,162],"for":[25],"standalone":[26],"SI2T":[27,51,142],"or":[28],"ST2I":[29,53,108,140],"perform":[30],"imperfectly":[31],"understanding,":[34],"due":[35],"to":[36,67,88,130],"difficulty":[38],"of":[39,126],"3D-wise":[40],"feature":[42],"modeling.":[43],"this":[45],"work,":[46],"we":[47,64,112],"consider":[48],"modeling":[49],"together":[54],"under":[55],"a":[56,75],"learning":[58,171],"framework.":[59],"During":[60],"framework,":[63,120],"then":[65],"propose":[66,113],"represent":[68],"3D":[70,77],"scene":[72,78],"features":[73,125],"with":[74],"novel":[76],"graph":[79],"(3DSG)":[80],"representation":[81],"can":[83],"be":[84],"shared":[85],"beneficial":[87],"both":[89],"tasks.":[90],"Further,":[91],"inspired":[92],"by":[93],"intuition":[95],"easier":[98],"3D$\\to$image":[99],"3D$\\to$text":[101],"processes":[102,129],"also":[103],"exist":[104],"symmetrically":[105],"SI2T,":[110],"respectively,":[111],"Spatial":[115],"Dual":[116],"Discrete":[117],"Diffusion":[118],"(SD$^3$)":[119],"which":[121],"utilizes":[122],"intermediate":[124],"3D$\\to$X":[128],"guide":[131],"hard":[133],"X$\\to$3D":[134],"processes,":[135],"such":[136],"overall":[139],"will":[143],"benefit":[144],"each":[145],"other.":[146],"On":[147],"dataset":[152],"VSD,":[153],"our":[154,169],"system":[155],"outperforms":[156],"mainstream":[158],"T2I":[159],"I2T":[161],"significantly.":[163],"Further":[164],"in-depth":[165],"analysis":[166],"reveals":[167],"how":[168],"strategy":[172],"advances.":[173]},"counts_by_year":[],"updated_date":"2026-04-15T08:11:43.952461","created_date":"2025-10-10T00:00:00"}
