{"id":"https://openalex.org/W4415336479","doi":"https://doi.org/10.48550/arxiv.2509.24248","title":"SpecExit: Accelerating Large Reasoning Model via Speculative Exit","display_name":"SpecExit: Accelerating Large Reasoning Model via Speculative Exit","publication_year":2025,"publication_date":"2025-09-29","ids":{"openalex":"https://openalex.org/W4415336479","doi":"https://doi.org/10.48550/arxiv.2509.24248"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2509.24248","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.24248","pdf_url":"https://arxiv.org/pdf/2509.24248","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2509.24248","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yang, Rubing","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yang, Rubing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Bai, Huajun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bai, Huajun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101930706","display_name":"Song Liu","orcid":"https://orcid.org/0000-0002-0851-3746"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Song","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014092511","display_name":"Guanghua Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Guanghua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Fan, Runzhi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Runzhi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Dang, Yanbin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dang, Yanbin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120058099","display_name":"Jiejing Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jiejing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045987910","display_name":"K.-H. Liu","orcid":"https://orcid.org/0009-0005-2024-1818"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Kai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059894156","display_name":"Jianchen Zhu","orcid":"https://orcid.org/0000-0002-5988-3704"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Jianchen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5054060382","display_name":"Peng Chen","orcid":"https://orcid.org/0000-0003-4577-9866"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Peng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.7562999725341797,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.7562999725341797,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6625999808311462},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5687000155448914},{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.5001000165939331},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4756999909877777},{"id":"https://openalex.org/keywords/low-latency","display_name":"Low latency (capital markets)","score":0.4205999970436096},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4117000102996826},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.3986000120639801}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8008999824523926},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6625999808311462},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5687000155448914},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.5001000165939331},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4756999909877777},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.4205999970436096},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4117000102996826},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3986000120639801},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.3467999994754791},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3352999985218048},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.32589998841285706},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2786000072956085},{"id":"https://openalex.org/C147297375","wikidata":"https://www.wikidata.org/wiki/Q6674930","display_name":"Look-ahead","level":2,"score":0.2603999972343445},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.25220000743865967}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2509.24248","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.24248","pdf_url":"https://arxiv.org/pdf/2509.24248","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2509.24248","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.24248","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2509.24248","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.24248","pdf_url":"https://arxiv.org/pdf/2509.24248","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Despite":[0],"their":[1,28,60,71,77],"strong":[2],"performance":[3],"on":[4,57,62],"reasoning":[5,8,41],"tasks,":[6],"large":[7],"models":[9],"(LRMs)":[10],"often":[11],"suffer":[12],"from":[13,108,150],"overthinking,":[14,33],"producing":[15],"unnecessarily":[16],"long":[17],"outputs":[18],"and":[19,75,103,127],"incurring":[20],"high":[21],"end-to-end":[22,72,133],"latency,":[23],"a":[24,66,95,109,129],"significant":[25,119],"limitation":[26],"to":[27,39,136,153],"real-world":[29],"deployment.":[30],"To":[31],"address":[32],"early-exit":[34,105,156],"mechanisms":[35,64],"have":[36],"been":[37],"proposed":[38],"terminate":[40],"before":[42],"typical":[43],"completion,":[44],"showing":[45],"that":[46,69,98],"this":[47],"approach":[48],"can":[49],"effectively":[50],"shorten":[51],"generation":[52,123],"length":[53,124],"with":[54],"minimal":[55],"impact":[56],"accuracy.":[58,143],"However,":[59],"reliance":[61],"probing":[63,114],"introduces":[65],"detection":[67],"overhead":[68],"limits":[70],"latency":[73,134],"gains":[74],"compromises":[76],"generalizability":[78],"across":[79],"diverse":[80],"problems.":[81],"Inspired":[82],"by":[83,125],"the":[84,137,147],"use":[85,160],"of":[86,161],"hidden":[87,151,162],"states":[88,152,163],"in":[89,132],"speculative":[90,138],"decoding,":[91],"we":[92],"propose":[93],"SpecExit,":[94],"novel":[96],"framework":[97],"predicts":[99],"both":[100],"future":[101],"tokens":[102],"an":[104],"signal":[106],"directly":[107],"lightweight":[110],"draft":[111],"model":[112],"without":[113,141],"overhead.":[115],"Our":[116,144,167],"method":[117,145],"offers":[118],"improvements,":[120],"reducing":[121],"average":[122],"66\\%":[126],"achieving":[128],"2.5x":[130],"speedup":[131],"compared":[135],"decoding":[139],"baseline,":[140],"compromising":[142],"leverages":[146],"inherent":[148],"signals":[149],"provide":[154],"effective":[155],"signals,":[157],"suggesting":[158],"broader":[159],"for":[164],"efficient":[165],"reasoning.":[166],"code":[168],"is":[169],"available":[170],"at":[171],"https://github.com/Tencent/AngelSlim.":[172]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-19T00:00:00"}
