{"id":"https://openalex.org/W7133360013","doi":"https://doi.org/10.48550/arxiv.2603.02172","title":"GeoDiT: Point-Conditioned Diffusion Transformer for Satellite Image Synthesis","display_name":"GeoDiT: Point-Conditioned Diffusion Transformer for Satellite Image Synthesis","publication_year":2026,"publication_date":"2026-03-02","ids":{"openalex":"https://openalex.org/W7133360013","doi":"https://doi.org/10.48550/arxiv.2603.02172"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.02172","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02172","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.02172","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089378935","display_name":"Srikumar Sastry","orcid":"https://orcid.org/0000-0002-4646-9416"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sastry, Srikumar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127759794","display_name":"Dan Cher","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cher, Dan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127971727","display_name":"Brian Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Brian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108919727","display_name":"Aayush Dhakal","orcid":"https://orcid.org/0000-0003-4431-0628"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dhakal, Aayush","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128010286","display_name":"Subash Khanal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khanal, Subash","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127927612","display_name":"Dev Gupta","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gupta, Dev","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5029557305","display_name":"Nathan Jacobs","orcid":"https://orcid.org/0000-0002-4242-8967"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jacobs, Nathan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.3946000039577484,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.3946000039577484,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.07000000029802322,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.03909999877214432,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/geolocation","display_name":"Geolocation","score":0.5315999984741211},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5145999789237976},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.49810001254081726},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4607999920845032},{"id":"https://openalex.org/keywords/satellite","display_name":"Satellite","score":0.4503999948501587},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.44760000705718994},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.430400013923645},{"id":"https://openalex.org/keywords/control-point","display_name":"Control point","score":0.3792000114917755}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7121000289916992},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5325999855995178},{"id":"https://openalex.org/C22041718","wikidata":"https://www.wikidata.org/wiki/Q638949","display_name":"Geolocation","level":2,"score":0.5315999984741211},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5145999789237976},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.49810001254081726},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4961000084877014},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4607999920845032},{"id":"https://openalex.org/C19269812","wikidata":"https://www.wikidata.org/wiki/Q26540","display_name":"Satellite","level":2,"score":0.4503999948501587},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.44760000705718994},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.430400013923645},{"id":"https://openalex.org/C2777774050","wikidata":"https://www.wikidata.org/wiki/Q16945110","display_name":"Control point","level":2,"score":0.3792000114917755},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.35850000381469727},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3337000012397766},{"id":"https://openalex.org/C184408114","wikidata":"https://www.wikidata.org/wiki/Q1502022","display_name":"Generative Design","level":3,"score":0.32030001282691956},{"id":"https://openalex.org/C2985301230","wikidata":"https://www.wikidata.org/wiki/Q725252","display_name":"Satellite image","level":3,"score":0.3165000081062317},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.2838999927043915},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.28049999475479126},{"id":"https://openalex.org/C2989087649","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Image synthesis","level":3,"score":0.27059999108314514},{"id":"https://openalex.org/C62649853","wikidata":"https://www.wikidata.org/wiki/Q199687","display_name":"Remote sensing","level":1,"score":0.25619998574256897},{"id":"https://openalex.org/C205372480","wikidata":"https://www.wikidata.org/wiki/Q210521","display_name":"Image resolution","level":2,"score":0.2551000118255615}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.02172","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02172","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.02172","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02172","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6847036480903625}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,103],"introduce":[1,37,85],"GeoDiT,":[2,112],"a":[3,38],"diffusion":[4],"transformer":[5],"designed":[6],"for":[7,77,110,120,125],"text-to-satellite":[8],"image":[9,17,79,118],"generation":[10,46,134],"with":[11,60],"point-based":[12,40],"control.":[13],"Existing":[14],"controlled":[15],"satellite":[16,78,117],"generative":[18,141],"models":[19],"often":[20],"require":[21],"pixel-level":[22],"maps":[23],"that":[24,43,91,130],"are":[25],"time-consuming":[26],"to":[27],"acquire,":[28],"yet":[29],"semantically":[30,64],"limited.":[31],"To":[32,81],"address":[33],"this":[34,82],"limitation,":[35],"we":[36,84],"novel":[39],"conditioning":[41],"framework":[42],"controls":[44],"the":[45,49,53,56,94,99,114,137],"process":[47],"through":[48],"spatial":[50],"location":[51],"of":[52,116],"points":[54],"and":[55,73,122],"textual":[57],"description":[58],"associated":[59],"each":[61],"point,":[62],"providing":[63],"rich":[65],"control":[66],"signals.":[67],"This":[68],"approach":[69],"enables":[70],"flexible,":[71],"annotation-friendly,":[72],"computationally":[74],"simple":[75],"inference":[76],"generation.":[80],"end,":[83],"an":[86],"adaptive":[87],"local":[88],"attention":[89,95],"mechanism":[90],"effectively":[92],"regularizes":[93],"scores":[96],"based":[97],"on":[98],"input":[100],"point":[101],"queries.":[102],"systematically":[104],"evaluate":[105],"various":[106],"domain-specific":[107],"design":[108],"choices":[109],"training":[111],"including":[113],"selection":[115],"representation":[119,124],"alignment":[121],"geolocation":[123],"conditioning.":[126],"Our":[127],"experiments":[128],"demonstrate":[129],"GeoDiT":[131],"achieves":[132],"impressive":[133],"performance,":[135],"surpassing":[136],"state-of-the-art":[138],"remote":[139],"sensing":[140],"models.":[142]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-04T00:00:00"}
