{"id":"https://openalex.org/W4403902989","doi":"https://doi.org/10.48550/arxiv.2410.03355","title":"LANTERN: Accelerating Visual Autoregressive Models with Relaxed Speculative Decoding","display_name":"LANTERN: Accelerating Visual Autoregressive Models with Relaxed Speculative Decoding","publication_year":2024,"publication_date":"2024-10-04","ids":{"openalex":"https://openalex.org/W4403902989","doi":"https://doi.org/10.48550/arxiv.2410.03355"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2410.03355","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.03355","pdf_url":"https://arxiv.org/pdf/2410.03355","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2410.03355","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114460248","display_name":"Doohyuk Jang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jang, Doohyuk","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108651659","display_name":"Sangup PARK","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Sihwan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Yang, June Yong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, June Yong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Jung, Yeonsung","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jung, Yeonsung","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076027607","display_name":"Junwoo Yun","orcid":"https://orcid.org/0009-0001-3838-8843"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yun, Jihun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087095284","display_name":"Souvik Kundu","orcid":"https://orcid.org/0000-0001-5815-8765"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kundu, Souvik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008949666","display_name":"Sung-Yub Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Sung-Yub","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5086698569","display_name":"Eunho Yang","orcid":"https://orcid.org/0000-0003-2188-0169"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Eunho","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5114460248"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9832000136375427,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9391999840736389,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.7836126089096069},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5959160923957825},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5227615833282471},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.3627300262451172},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.2947719693183899},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.20056027173995972}],"concepts":[{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.7836126089096069},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5959160923957825},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5227615833282471},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.3627300262451172},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.2947719693183899},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.20056027173995972}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2410.03355","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.03355","pdf_url":"https://arxiv.org/pdf/2410.03355","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2410.03355","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2410.03355","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2410.03355","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.03355","pdf_url":"https://arxiv.org/pdf/2410.03355","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2171218219","https://openalex.org/W1972271943","https://openalex.org/W2150410159","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W2156628102"],"abstract_inverted_index":{"Auto-Regressive":[0],"(AR)":[1],"models":[2,26,43,75,98,146],"have":[3],"recently":[4],"gained":[5],"prominence":[6],"in":[7,66,72,86,132,143,193],"image":[8,180],"generation,":[9],"often":[10],"matching":[11],"or":[12,46,182],"even":[13],"surpassing":[14],"the":[15,107,128,138,188,209],"performance":[16,108],"of":[17,24,109,130,140,152,190,208],"diffusion":[18],"models.":[19],"However,":[20],"one":[21,34],"major":[22],"limitation":[23],"AR":[25,74,97,145,236],"is":[27,240],"their":[28],"sequential":[29],"nature,":[30],"which":[31,89],"processes":[32],"tokens":[33,65,131,154],"at":[35,243],"a":[36,67,84,118,164,195,205,233],"time,":[37],"slowing":[38],"down":[39],"generation":[40],"compared":[41,203,221],"to":[42,104,123,204,222,231],"like":[44],"GANs":[45],"diffusion-based":[47],"methods":[48],"that":[49,126,155,171],"operate":[50],"more":[51,149],"efficiently.":[52],"While":[53],"speculative":[54,110,141,199,211],"decoding":[55,142,224],"has":[56],"proven":[57],"effective":[58],"for":[59],"accelerating":[60],"LLMs":[61],"by":[62,147,162,216],"generating":[63],"multiple":[64],"single":[68],"forward,":[69],"its":[70],"application":[71,207],"visual":[73,96,144,235],"remains":[76],"largely":[77],"unexplored.":[78],"In":[79,201],"this":[80,87,114],"work,":[81],"we":[82,90,116,169],"identify":[83],"challenge":[85],"setting,":[88],"term":[91],"\\textit{token":[92],"selection":[93],"ambiguity},":[94],"wherein":[95],"frequently":[99],"assign":[100],"uniformly":[101],"low":[102],"probabilities":[103],"tokens,":[105],"hampering":[106],"decoding.":[111,200],"To":[112],"overcome":[113],"challenge,":[115],"propose":[117],"relaxed":[119],"acceptance":[120],"condition":[121],"referred":[122],"as":[124,220],"LANTERN":[125,213],"leverages":[127],"interchangeability":[129],"latent":[133],"space.":[134],"This":[135],"relaxation":[136],"restores":[137],"effectiveness":[139],"enabling":[148],"flexible":[150],"use":[151],"candidate":[153],"would":[156],"otherwise":[157],"be":[158],"prematurely":[159],"rejected.":[160],"Furthermore,":[161],"incorporating":[163],"total":[165],"variation":[166],"distance":[167],"bound,":[168],"ensure":[170],"these":[172],"speed":[173],"gains":[174],"are":[175],"achieved":[176],"without":[177],"significantly":[178],"compromising":[179],"quality":[181],"semantic":[183],"coherence.":[184],"Experimental":[185],"results":[186],"demonstrate":[187],"efficacy":[189],"our":[191],"method":[192],"providing":[194],"substantial":[196],"speed-up":[197],"over":[198],"specific,":[202],"na\u00efve":[206],"state-of-the-art":[210],"decoding,":[212],"increases":[214],"speed-ups":[215],"$\\mathbf{1.75}\\times$":[217],"and":[218,225],"$\\mathbf{1.82}\\times$,":[219],"greedy":[223],"random":[226],"sampling,":[227],"respectively,":[228],"when":[229],"applied":[230],"LlamaGen,":[232],"contemporary":[234],"model.":[237],"The":[238],"code":[239],"publicly":[241],"available":[242],"https://github.com/jadohu/LANTERN.":[244]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
