{"id":"https://openalex.org/W7137897959","doi":"https://doi.org/10.1609/aaai.v40i11.37827","title":"CART: Compositional AutoRegressive Transformer for Image Generation","display_name":"CART: Compositional AutoRegressive Transformer for Image Generation","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137897959","doi":"https://doi.org/10.1609/aaai.v40i11.37827"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i11.37827","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i11.37827","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i11.37827","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000014025","display_name":"Siddharth Roheda","orcid":"https://orcid.org/0000-0002-6195-8517"},"institutions":[{"id":"https://openalex.org/I4210101778","display_name":"Samsung (United States)","ror":"https://ror.org/01bfbvm65","country_code":"US","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210101778"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Siddharth Roheda","raw_affiliation_strings":["Samsung Research Institute"],"affiliations":[{"raw_affiliation_string":"Samsung Research Institute","institution_ids":["https://openalex.org/I4210101778"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129734780","display_name":"Rohit Chowdhury","orcid":null},"institutions":[{"id":"https://openalex.org/I4210101778","display_name":"Samsung (United States)","ror":"https://ror.org/01bfbvm65","country_code":"US","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210101778"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rohit Chowdhury","raw_affiliation_strings":["Samsung Research Institute"],"affiliations":[{"raw_affiliation_string":"Samsung Research Institute","institution_ids":["https://openalex.org/I4210101778"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125712797","display_name":"Aniruddha Bala","orcid":null},"institutions":[{"id":"https://openalex.org/I4210101778","display_name":"Samsung (United States)","ror":"https://ror.org/01bfbvm65","country_code":"US","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210101778"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aniruddha Bala","raw_affiliation_strings":["Samsung Research Institute"],"affiliations":[{"raw_affiliation_string":"Samsung Research Institute","institution_ids":["https://openalex.org/I4210101778"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129678060","display_name":"Rohan Jaiswal","orcid":null},"institutions":[{"id":"https://openalex.org/I4210101778","display_name":"Samsung (United States)","ror":"https://ror.org/01bfbvm65","country_code":"US","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210101778"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rohan Jaiswal","raw_affiliation_strings":["Samsung Research Institute"],"affiliations":[{"raw_affiliation_string":"Samsung Research Institute","institution_ids":["https://openalex.org/I4210101778"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5000014025"],"corresponding_institution_ids":["https://openalex.org/I4210101778"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13134328,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"11","first_page":"8740","last_page":"8750"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9534000158309937,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9534000158309937,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.01080000028014183,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.005200000014156103,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/specularity","display_name":"Specularity","score":0.6270999908447266},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5250999927520752},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.516700029373169},{"id":"https://openalex.org/keywords/generality","display_name":"Generality","score":0.5149999856948853},{"id":"https://openalex.org/keywords/decomposition","display_name":"Decomposition","score":0.48570001125335693},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.482699990272522},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.4318000078201294},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.41530001163482666},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.38019999861717224}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7174000144004822},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6601999998092651},{"id":"https://openalex.org/C2779456664","wikidata":"https://www.wikidata.org/wiki/Q972162","display_name":"Specularity","level":3,"score":0.6270999908447266},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5421000123023987},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5250999927520752},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.516700029373169},{"id":"https://openalex.org/C2780767217","wikidata":"https://www.wikidata.org/wiki/Q5532421","display_name":"Generality","level":2,"score":0.5149999856948853},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.48570001125335693},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.482699990272522},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.4318000078201294},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.41530001163482666},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.38019999861717224},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.37860000133514404},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.33390000462532043},{"id":"https://openalex.org/C200873422","wikidata":"https://www.wikidata.org/wiki/Q5448821","display_name":"Filling-in","level":2,"score":0.32179999351501465},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.31189998984336853},{"id":"https://openalex.org/C50494287","wikidata":"https://www.wikidata.org/wiki/Q658467","display_name":"Texture synthesis","level":5,"score":0.30889999866485596},{"id":"https://openalex.org/C157202957","wikidata":"https://www.wikidata.org/wiki/Q1659609","display_name":"Image warping","level":2,"score":0.30889999866485596},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3066999912261963},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.29679998755455017},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2888000011444092},{"id":"https://openalex.org/C70587473","wikidata":"https://www.wikidata.org/wiki/Q7834111","display_name":"Transformative learning","level":2,"score":0.28540000319480896},{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.27559998631477356},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.272599995136261},{"id":"https://openalex.org/C106430172","wikidata":"https://www.wikidata.org/wiki/Q6002272","display_name":"Image restoration","level":4,"score":0.2705000042915344},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.26829999685287476},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C79106606","wikidata":"https://www.wikidata.org/wiki/Q735197","display_name":"Afterimage","level":3,"score":0.2612999975681305}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i11.37827","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i11.37827","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i11.37827","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i11.37827","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,61],"propose":[1],"a":[2],"novel":[3],"Auto-Regressive":[4],"(AR)":[5],"image":[6,54,117,131],"generation":[7],"approach":[8],"that":[9],"models":[10,21],"images":[11],"as":[12],"hierarchical":[13],"compositions":[14],"of":[15,47,67],"interpretable":[16],"visual":[17],"layers.":[18],"While":[19],"AR":[20],"have":[22],"achieved":[23],"transformative":[24],"success":[25,31],"in":[26,32,41],"language":[27],"modeling,":[28],"replicating":[29],"this":[30],"vision":[33,48],"remains":[34],"challenging":[35],"due":[36],"to":[37],"inherent":[38],"spatial":[39],"dependencies":[40],"images.":[42],"Addressing":[43],"the":[44,63],"unique":[45],"challenges":[46],"tasks,":[49],"our":[50],"method":[51],"(CART)":[52],"adds":[53],"details":[55],"iteratively":[56],"via":[57,126],"semantically":[58],"meaningful":[59],"decompositions.":[60],"demonstrate":[62],"flexibility":[64],"and":[65,86,97,104],"generality":[66],"CART":[68,109],"by":[69],"applying":[70],"it":[71],"across":[72],"three":[73],"distinct":[74],"decomposition":[75],"strategies:":[76],"(i)":[77],"Base-Detail":[78],"Decomposition":[79,84,89],"(Mumford-Shah":[80],"smoothness),":[81],"(ii)":[82],"Intrinsic":[83],"(albedo/shading),":[85],"(iii)":[87],"Specularity":[88],"(diffuse/specular).":[90],"This":[91],"\u201cnext-detail\"":[92],"strategy":[93],"outperforms":[94],"traditional":[95],"\u201cnext-token\"":[96],"\u201cnext-scale\"":[98],"approaches,":[99],"improving":[100],"controllability,":[101],"semantic":[102],"interpretability,":[103],"resolution":[105],"scalability.":[106],"Experiments":[107],"show":[108],"generates":[110],"visually":[111],"compelling":[112],"results":[113],"while":[114],"enabling":[115],"structured":[116],"manipulation,":[118],"opening":[119],"new":[120],"directions":[121],"for":[122],"controllable":[123],"generative":[124],"modeling":[125],"physically":[127],"or":[128],"perceptually":[129],"motivated":[130],"factorization.":[132]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
