{"id":"https://openalex.org/W7134829092","doi":"https://doi.org/10.48550/arxiv.2603.06666","title":"SJD-PV: Speculative Jacobi Decoding with Phrase Verification for Autoregressive Image Generation","display_name":"SJD-PV: Speculative Jacobi Decoding with Phrase Verification for Autoregressive Image Generation","publication_year":2026,"publication_date":"2026-03-02","ids":{"openalex":"https://openalex.org/W7134829092","doi":"https://doi.org/10.48550/arxiv.2603.06666"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.06666","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128648987","display_name":"Zhehao Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yu, Zhehao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128681233","display_name":"Baoquan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Baoquan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128662335","display_name":"Bingqi Shan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shan, Bingqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083694367","display_name":"Xinhao Liu","orcid":"https://orcid.org/0000-0002-5640-1837"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xinhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048409282","display_name":"Dongliang Zhou","orcid":"https://orcid.org/0000-0003-0361-8597"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Dongliang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113395536","display_name":"Guotao Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Guotao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053024025","display_name":"Guangming Ye","orcid":"https://orcid.org/0000-0003-0599-5310"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Guangming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128667651","display_name":"Yunming Ye","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Yunming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5128648987"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8708999752998352,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8708999752998352,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0731000006198883,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.006000000052154064,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.8270000219345093},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.777899980545044},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.7682999968528748},{"id":"https://openalex.org/keywords/phrase","display_name":"Phrase","score":0.5099999904632568},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.444599986076355},{"id":"https://openalex.org/keywords/independence","display_name":"Independence (probability theory)","score":0.3756999969482422},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.36649999022483826},{"id":"https://openalex.org/keywords/signature","display_name":"Signature (topology)","score":0.36480000615119934}],"concepts":[{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.8270000219345093},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.777899980545044},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.7682999968528748},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7368000149726868},{"id":"https://openalex.org/C2776224158","wikidata":"https://www.wikidata.org/wiki/Q187931","display_name":"Phrase","level":2,"score":0.5099999904632568},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5088000297546387},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.444599986076355},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4271000027656555},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.42170000076293945},{"id":"https://openalex.org/C35651441","wikidata":"https://www.wikidata.org/wiki/Q625303","display_name":"Independence (probability theory)","level":2,"score":0.3756999969482422},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.36649999022483826},{"id":"https://openalex.org/C2779696439","wikidata":"https://www.wikidata.org/wiki/Q7512811","display_name":"Signature (topology)","level":2,"score":0.36480000615119934},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.34279999136924744},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.3418000042438507},{"id":"https://openalex.org/C113954288","wikidata":"https://www.wikidata.org/wiki/Q186885","display_name":"Timestamp","level":2,"score":0.3255000114440918},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.32429999113082886},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2939000129699707},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2766000032424927},{"id":"https://openalex.org/C205203396","wikidata":"https://www.wikidata.org/wiki/Q612143","display_name":"Bilinear interpolation","level":2,"score":0.27300000190734863},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2727000117301941},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.2703000009059906},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.2621000111103058},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.25949999690055847},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.2590000033378601}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.06666","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.06666","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.06666","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.06666","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Autoregressive":[0],"(AR)":[1],"image":[2],"models":[3],"have":[4],"recently":[5],"demonstrated":[6],"remarkable":[7],"generative":[8],"capability,":[9],"but":[10],"their":[11],"sequential":[12],"nature":[13],"results":[14],"in":[15],"significant":[16],"inference":[17],"latency.":[18],"Existing":[19],"training-free":[20,55],"acceleration":[21,56],"methods":[22],"typically":[23],"verify":[24],"tokens":[25,71,94,118],"independently,":[26],"overlooking":[27],"the":[28,64,87,102,135],"strong":[29],"co-occurrence":[30,84,159],"patterns":[31],"between":[32],"adjacent":[33],"visual":[34,98,150],"tokens.":[35],"This":[36],"independence":[37],"assumption":[38],"often":[39],"leads":[40],"to":[41,66,144],"contextual":[42],"inconsistency":[43],"and":[44,90,141,163],"limits":[45],"decoding":[46,74,147],"efficiency.":[47],"In":[48],"this":[49],"work,":[50],"we":[51,81],"introduce":[52],"a":[53],"novel":[54],"framework":[57],"that":[58,130,155],"performs":[59],"phrase-level":[60,104],"speculative":[61],"verification,":[62],"enabling":[63],"model":[65],"jointly":[67],"validate":[68],"multiple":[69,117],"correlated":[70],"within":[72],"each":[73,111],"window.":[75],"To":[76],"construct":[77],"such":[78],"phrase":[79],"units,":[80],"analyze":[82],"token":[83,158],"statistics":[85],"from":[86],"training":[88],"corpus":[89],"group":[91],"frequently":[92],"co-occurring":[93],"into":[95],"semantically":[96],"coherent":[97],"phrases.":[99],"During":[100],"inference,":[101],"proposed":[103],"verification":[105],"evaluates":[106],"aggregated":[107],"likelihood":[108],"ratios":[109],"over":[110],"phrase,":[112],"allowing":[113],"simultaneous":[114],"acceptance":[115],"of":[116,137],"while":[119],"preserving":[120],"generation":[121,128],"quality.":[122],"Extensive":[123],"experiments":[124],"on":[125],"autoregressive":[126,168],"text-to-image":[127],"show":[129],"our":[131],"method":[132],"significantly":[133],"reduces":[134],"number":[136],"function":[138],"evaluations":[139],"(NFE)":[140],"achieves":[142],"up":[143],"30%":[145],"faster":[146],"without":[148],"compromising":[149],"fidelity.":[151],"Our":[152],"findings":[153],"reveal":[154],"modeling":[156],"short-range":[157],"provides":[160],"an":[161],"effective":[162],"general":[164],"principle":[165],"for":[166],"accelerating":[167],"inference.":[169]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-11T00:00:00"}
