{"id":"https://openalex.org/W7133328361","doi":"https://doi.org/10.48550/arxiv.2603.00846","title":"Tiny-Critic RAG: Empowering Agentic Fallback with Parameter-Efficient Small Language Models","display_name":"Tiny-Critic RAG: Empowering Agentic Fallback with Parameter-Efficient Small Language Models","publication_year":2026,"publication_date":"2026-03-01","ids":{"openalex":"https://openalex.org/W7133328361","doi":"https://doi.org/10.48550/arxiv.2603.00846"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.00846","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00846","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.00846","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128033813","display_name":"Yichao Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wu, Yichao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127896390","display_name":"Penghao Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Penghao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109734482","display_name":"Yafei Xiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiang, Yafei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127910971","display_name":"Mengwei Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Mengwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127894741","display_name":"Jianan Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jianan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127931801","display_name":"Jing Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034388453","display_name":"Xianyou Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xianyou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5102381470","display_name":"Weiran Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Weiran","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5128033813"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.28600001335144043,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.28600001335144043,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.2337000072002411,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.046799998730421066,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6466000080108643},{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.5802000164985657},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.5253999829292297},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4447999894618988},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.4146000146865845},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4027999937534332},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.3783999979496002},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.3718999922275543}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7282999753952026},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6466000080108643},{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.5802000164985657},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.5253999829292297},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4447999894618988},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.4146000146865845},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4027999937534332},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.3783999979496002},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.3718999922275543},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37070000171661377},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3271999955177307},{"id":"https://openalex.org/C193856179","wikidata":"https://www.wikidata.org/wiki/Q5251100","display_name":"Defeasible estate","level":2,"score":0.3109999895095825},{"id":"https://openalex.org/C78023250","wikidata":"https://www.wikidata.org/wiki/Q657596","display_name":"Unary operation","level":2,"score":0.3034000098705292},{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.28349998593330383},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.28290000557899475},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2782000005245209},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.26840001344680786},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26420000195503235},{"id":"https://openalex.org/C205606062","wikidata":"https://www.wikidata.org/wiki/Q5249645","display_name":"Decoupling (probability)","level":2,"score":0.2615000009536743},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.25940001010894775},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.2590000033378601},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.25529998540878296},{"id":"https://openalex.org/C197855036","wikidata":"https://www.wikidata.org/wiki/Q380172","display_name":"Binary tree","level":2,"score":0.2551000118255615},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.00846","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00846","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.00846","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00846","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Retrieval-Augmented":[0],"Generation":[1],"(RAG)":[2],"grounds":[3],"Large":[4],"Language":[5,98],"Models":[6],"(LLMs)":[7],"to":[8,18,71,134],"mitigate":[9],"factual":[10],"hallucinations.":[11],"Recent":[12],"paradigms":[13],"shift":[14],"from":[15],"static":[16],"pipelines":[17],"Modular":[19],"and":[20,78,85,114],"Agentic":[21],"RAG":[22,35,129],"frameworks,":[23],"granting":[24],"models":[25,53,70],"autonomy":[26],"for":[27,51,55,118,149],"multi-hop":[28],"reasoning":[29,77],"or":[30],"self-correction.":[31],"However,":[32],"current":[33],"reflective":[34],"heavily":[36],"relies":[37],"on":[38,75,124],"massive":[39],"LLMs":[40],"as":[41,106],"universal":[42],"evaluators.":[43],"In":[44],"high-throughput":[45],"systems,":[46],"executing":[47],"complete":[48],"forward":[49],"passes":[50],"billion-parameter":[52],"merely":[54],"binary":[56,121],"routing":[57,131],"introduces":[58],"severe":[59],"computational":[60],"redundancy.":[61],"Furthermore,":[62],"in":[63],"autonomous":[64],"agent":[65,150],"scenarios,":[66],"inaccurate":[67],"retrieval":[68],"causes":[69],"expend":[72],"excessive":[73],"tokens":[74],"spurious":[76],"redundant":[79],"tool":[80],"calls,":[81],"inflating":[82],"Time-to-First-Token":[83],"(TTFT)":[84],"costs.":[86],"We":[87],"propose":[88],"Tiny-Critic":[89,110,128],"RAG,":[90],"decoupling":[91],"evaluation":[92],"by":[93,139],"deploying":[94],"a":[95,107,145],"parameter-efficient":[96],"Small":[97],"Model":[99],"(SLM)":[100],"via":[101],"Low-Rank":[102],"Adaptation":[103],"(LoRA).":[104],"Acting":[105],"deterministic":[108],"gatekeeper,":[109],"employs":[111],"constrained":[112],"decoding":[113],"non-thinking":[115],"inference":[116],"modes":[117],"ultra-low":[119],"latency":[120,138],"routing.":[122],"Evaluations":[123],"noise-injected":[125],"datasets":[126],"demonstrate":[127],"achieves":[130],"accuracy":[132],"comparable":[133],"GPT-4o-mini":[135],"while":[136],"reducing":[137],"an":[140],"order":[141],"of":[142],"magnitude,":[143],"establishing":[144],"highly":[146],"cost-effective":[147],"paradigm":[148],"deployment.":[151]},"counts_by_year":[],"updated_date":"2026-03-04T07:09:34.246503","created_date":"2026-03-04T00:00:00"}
