{"id":"https://openalex.org/W7154206122","doi":"https://doi.org/10.48550/arxiv.2604.11089","title":"Structured State-Space Regularization for Generation-Friendly Image Tokenization","display_name":"Structured State-Space Regularization for Generation-Friendly Image Tokenization","publication_year":2026,"publication_date":"2026-04-13","ids":{"openalex":"https://openalex.org/W7154206122","doi":"https://doi.org/10.48550/arxiv.2604.11089"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.11089","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.11089","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.11089","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032201384","display_name":"Jinsung Lee","orcid":"https://orcid.org/0000-0001-9192-6623"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Jinsung","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133622584","display_name":"Jaemin Oh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Oh, Jaemin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037371213","display_name":"\uae40\ub0a8\ud6c8","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Namhun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105687650","display_name":"Dongwon Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Dongwon","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081620315","display_name":"Byung-Jun Yoon","orcid":"https://orcid.org/0000-0001-9328-1101"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yoon, Byung-Jun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101352002","display_name":"Suha Kwak","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kwak, Suha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9861000180244446,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9861000180244446,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.0031999999191612005,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.0017999999690800905,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.5831000208854675},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.5615000128746033},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5443999767303467},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4715999960899353},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4562000036239624},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.43459999561309814},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4311000108718872},{"id":"https://openalex.org/keywords/latent-image","display_name":"Latent image","score":0.39800000190734863}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6559000015258789},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.619700014591217},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.5831000208854675},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.5615000128746033},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5443999767303467},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4715999960899353},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4562000036239624},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.43459999561309814},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4311000108718872},{"id":"https://openalex.org/C205372313","wikidata":"https://www.wikidata.org/wiki/Q355645","display_name":"Latent image","level":3,"score":0.39800000190734863},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3822999894618988},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.37310001254081726},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.335099995136261},{"id":"https://openalex.org/C83248878","wikidata":"https://www.wikidata.org/wiki/Q344000","display_name":"Active appearance model","level":3,"score":0.3296000063419342},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2833000123500824},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2802000045776367},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.27950000762939453},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2727000117301941},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.26750001311302185},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26579999923706055},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2551000118255615},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.25270000100135803}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.11089","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.11089","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.11089","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.11089","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Image":[0],"tokenizers":[1,120],"play":[2],"a":[3,51,63,74,95],"central":[4],"role":[5],"in":[6,58,94,126],"modern":[7],"generative":[8,116],"models,":[9],"where":[10],"the":[11,14,19,35,90,100,115],"structure":[12,57,106],"of":[13,28,84,107,118],"latent":[15,30,59,101],"space":[16,102],"critically":[17],"determines":[18],"downstream":[20],"generation":[21],"performance.":[22],"A":[23],"key":[24],"but":[25],"underexplored":[26],"property":[27],"effective":[29],"representations":[31],"is":[32],"spectral":[33,56,105],"organization,":[34],"ability":[36],"to":[37,54,88,103],"encode":[38],"information":[39],"across":[40],"frequency":[41,91],"components.":[42],"In":[43],"this":[44],"work,":[45],"we":[46],"introduce":[47],"structured":[48],"state-space":[49,68],"regularization,":[50],"principled":[52],"approach":[53],"inducing":[55],"spaces.":[60],"We":[61],"derive":[62],"regularization":[64],"objective":[65],"by":[66],"revisiting":[67],"models":[69],"(SSMs)":[70],"as":[71],"systems":[72],"mimicking":[73],"basis":[75],"function's":[76],"behavior.":[77],"This":[78],"perspective":[79],"reveals":[80],"that":[81,98,111],"hidden":[82],"states":[83],"SSMs":[85],"are":[86],"induced":[87],"capture":[89,104],"components,":[92],"resulting":[93],"novel":[96],"regularizer":[97,113],"enforces":[99],"images.":[108],"Experiments":[109],"demonstrate":[110],"our":[112],"improves":[114],"performance":[117],"image":[119],"while":[121],"incurring":[122],"only":[123],"minimal":[124],"loss":[125],"their":[127],"reconstruction":[128],"fidelity.":[129]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-15T00:00:00"}
