{"id":"https://openalex.org/W7138259309","doi":"https://doi.org/10.1609/aaai.v40i9.37614","title":"Improved Masked Image Generation with Knowledge-Augmented Token Representations","display_name":"Improved Masked Image Generation with Knowledge-Augmented Token Representations","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138259309","doi":"https://doi.org/10.1609/aaai.v40i9.37614"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i9.37614","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i9.37614","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i9.37614","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113395536","display_name":"Guotao Liang","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guotao Liang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen\nPengcheng Laboratory"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen\nPengcheng Laboratory","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128681233","display_name":"Baoquan Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Baoquan Zhang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129727609","display_name":"Zhiyuan Wen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyuan Wen","raw_affiliation_strings":["Pengcheng Laboratory"],"affiliations":[{"raw_affiliation_string":"Pengcheng Laboratory","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101195298","display_name":"Zihao Han","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zihao Han","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129709829","display_name":"Yunming Ye","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunming Ye","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen\nPengcheng Laboratory"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen\nPengcheng Laboratory","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5113395536"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.60895522,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"9","first_page":"6817","last_page":"6825"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9258000254631042,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9258000254631042,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.01759999990463257,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.011699999682605267,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.7694000005722046},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5770000219345093},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5020999908447266},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.45329999923706055},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.43939998745918274},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4203999936580658},{"id":"https://openalex.org/keywords/prior-probability","display_name":"Prior probability","score":0.41029998660087585}],"concepts":[{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.7694000005722046},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7678999900817871},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.583299994468689},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5770000219345093},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5020999908447266},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.45329999923706055},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.43939998745918274},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4203999936580658},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.41029998660087585},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3562000095844269},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35440000891685486},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.30880001187324524},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2962999939918518},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28459998965263367},{"id":"https://openalex.org/C197914299","wikidata":"https://www.wikidata.org/wiki/Q18650","display_name":"Semantic memory","level":3,"score":0.28139999508857727},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.2727000117301941},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2619999945163727}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i9.37614","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i9.37614","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i9.37614","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i9.37614","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Masked":[0,66],"image":[1,196],"generation":[2,182,197],"(MIG)":[3],"has":[4],"demonstrated":[5],"remarkable":[6],"efficiency":[7],"and":[8,51,99,111,123,142],"high-fidelity":[9],"images":[10],"by":[11],"enabling":[12],"parallel":[13],"token":[14,31,105,141],"prediction.":[15],"Existing":[16],"methods":[17],"typically":[18],"rely":[19],"solely":[20],"on":[21,129,198],"the":[22,44,83,116,119,124,159,172],"model":[23],"itself":[24],"to":[25,88,139,153,164,175,180],"learn":[26,89,140],"semantic":[27,37,49,78,120,177],"dependencies":[28,38,79],"among":[29],"visual":[30],"sequences.":[32],"However,":[33],"directly":[34],"learning":[35],"such":[36,165],"from":[39,82],"data":[40],"is":[41,151],"challenging":[42],"because":[43],"individual":[45],"tokens":[46],"lack":[47],"clear":[48],"meanings,":[50],"these":[52,155],"sequences":[53],"are":[54],"usually":[55],"long.":[56],"To":[57],"address":[58],"this":[59],"limitation,":[60],"we":[61,97,134],"propose":[62],"a":[63,136,147],"novel":[64],"Knowledge-Augmented":[65],"Image":[67],"Generation":[68],"framework,":[69],"named":[70],"KA-MIG,":[71],"which":[72],"introduces":[73],"explicit":[74],"knowledge":[75,106,132],"of":[76,103],"token-level":[77],"(i.e.,":[80,115],"extracted":[81],"training":[84],"data)":[85],"as":[86],"priors":[87],"richer":[90],"representations":[91,157],"for":[92,194],"improving":[93],"performance.":[94],"In":[95],"particular,":[96],"explore":[98],"identify":[100],"three":[101,130],"types":[102],"advantageous":[104],"graphs,":[107,133],"including":[108],"two":[109],"positive":[110],"one":[112],"negative":[113],"graphs":[114],"co-occurrence":[117],"graph,":[118,122],"similarity":[121],"position-token":[125],"incompatibility":[126],"graph).":[127],"Based":[128],"prior":[131,166],"design":[135],"graph-aware":[137],"encoder":[138],"position-aware":[143],"representations.":[144],"After":[145],"that,":[146],"lightweight":[148],"fusion":[149],"mechanism":[150],"introduced":[152],"integrate":[154],"enriched":[156],"into":[158],"existing":[160,192],"MIG":[161,193],"methods.":[162],"Resorting":[163],"knowledge,":[167],"our":[168,188],"method":[169,189],"effectively":[170],"enhances":[171],"model's":[173],"ability":[174],"capture":[176],"dependencies,":[178],"leading":[179],"improved":[181],"quality.":[183],"Experimental":[184],"results":[185],"demonstrate":[186],"that":[187],"improves":[190],"upon":[191],"class-conditional":[195],"ImageNet.":[199]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
