{"id":"https://openalex.org/W7140277240","doi":"https://doi.org/10.48550/arxiv.2603.22304","title":"Mitigating Premature Discretization with Progressive Quantization for Robust Vector Tokenization","display_name":"Mitigating Premature Discretization with Progressive Quantization for Robust Vector Tokenization","publication_year":2026,"publication_date":"2026-03-17","ids":{"openalex":"https://openalex.org/W7140277240","doi":"https://doi.org/10.48550/arxiv.2603.22304"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.22304","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.22304","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.22304","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130599998","display_name":"Wenhao Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Wenhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000954123","display_name":"Qiran Zou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, Qiran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024900991","display_name":"Zhouhan Lin","orcid":"https://orcid.org/0009-0009-7204-0689"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Zhouhan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130573584","display_name":"Dianbo Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Dianbo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.32760000228881836,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.32760000228881836,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.13379999995231628,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.06830000132322311,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vector-quantization","display_name":"Vector quantization","score":0.791100025177002},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.6669999957084656},{"id":"https://openalex.org/keywords/codebook","display_name":"Codebook","score":0.5525000095367432},{"id":"https://openalex.org/keywords/linde\u2013buzo\u2013gray-algorithm","display_name":"Linde\u2013Buzo\u2013Gray algorithm","score":0.4878000020980835},{"id":"https://openalex.org/keywords/discretization","display_name":"Discretization","score":0.4715000092983246},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3513999879360199}],"concepts":[{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.791100025177002},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6783999800682068},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.6669999957084656},{"id":"https://openalex.org/C127759330","wikidata":"https://www.wikidata.org/wiki/Q637416","display_name":"Codebook","level":2,"score":0.5525000095367432},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5382000207901001},{"id":"https://openalex.org/C93372532","wikidata":"https://www.wikidata.org/wiki/Q6552455","display_name":"Linde\u2013Buzo\u2013Gray algorithm","level":3,"score":0.4878000020980835},{"id":"https://openalex.org/C73000952","wikidata":"https://www.wikidata.org/wiki/Q17007827","display_name":"Discretization","level":2,"score":0.4715000092983246},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.42010000348091125},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3513999879360199},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.30230000615119934},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.2985999882221222},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2930999994277954},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.29249998927116394},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27219998836517334},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2689000070095062},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2547999918460846},{"id":"https://openalex.org/C176982825","wikidata":"https://www.wikidata.org/wiki/Q835922","display_name":"Lexical analysis","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.22304","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.22304","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.22304","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.22304","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vector":[0],"Quantization":[1,51],"(VQ)":[2],"has":[3,33],"become":[4],"the":[5,31,35,55,91,94,101,117,123,149],"cornerstone":[6],"of":[7,57,104],"tokenization":[8,147],"for":[9,126,134,144],"many":[10],"multimodal":[11],"Large":[12],"Language":[13],"Models":[14],"and":[15,113,119],"diffusion":[16],"synthesis.":[17],"However,":[18],"existing":[19],"VQ":[20,68],"paradigms":[21],"suffer":[22],"from":[23,79],"a":[24,61,74,80,85,140],"fundamental":[25,62],"conflict:":[26],"they":[27],"enforce":[28],"discretization":[29],"before":[30],"encoder":[32],"captured":[34],"underlying":[36],"data":[37],"manifold.":[38],"We":[39,109],"term":[40],"this":[41],"phenomenon":[42],"Premature":[43],"Discretization.":[44],"To":[45],"resolve":[46],"this,":[47],"we":[48],"propose":[49],"Progressive":[50],"(ProVQ),":[52],"which":[53],"incorporates":[54],"dynamics":[56],"quantization":[58,72],"hardness":[59],"as":[60,73],"yet":[63],"previously":[64],"overlooked":[65],"axis":[66],"in":[67],"training.":[69],"By":[70],"treating":[71],"curriculum":[75],"that":[76],"smoothly":[77],"anneals":[78],"continuous":[81],"latent":[82],"space":[83],"to":[84],"discrete":[86],"one,":[87],"ProVQ":[88,105,130],"effectively":[89],"guides":[90],"codebook":[92],"toward":[93],"well-expanded":[95],"manifolds.":[96],"Extensive":[97],"experimental":[98],"results":[99],"demonstrate":[100],"broad":[102],"effectiveness":[103],"across":[106],"diverse":[107],"modalities.":[108],"report":[110],"improved":[111],"reconstruction":[112],"generative":[114,127],"performance":[115,142],"on":[116,148],"ImageNet-1K":[118],"ImageNet-100":[120],"benchmarks,":[121],"highlighting":[122],"ProVQ's":[124],"boost":[125],"modeling.":[128],"Furthermore,":[129],"proves":[131],"highly":[132],"effective":[133],"modeling":[135],"complex":[136],"biological":[137],"sequences,":[138],"establishing":[139],"new":[141],"ceiling":[143],"protein":[145],"structure":[146],"StrutTokenBench":[150],"leaderboard.":[151]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-26T00:00:00"}
