{"id":"https://openalex.org/W7140118267","doi":"https://doi.org/10.48550/arxiv.2603.19415","title":"Scalable Prompt Routing via Fine-Grained Latent Task Discovery","display_name":"Scalable Prompt Routing via Fine-Grained Latent Task Discovery","publication_year":2026,"publication_date":"2026-03-19","ids":{"openalex":"https://openalex.org/W7140118267","doi":"https://doi.org/10.48550/arxiv.2603.19415"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.19415","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19415","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.19415","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130356104","display_name":"Yunyi Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Yunyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126219525","display_name":"Soji Adeshina","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Adeshina, Soji","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130348356","display_name":"Patrick Guan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guan, Sheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130372319","display_name":"Ashwin Ganesh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ganesh, Ashwin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130353160","display_name":"Zhen Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Zhen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023028617","display_name":"Vassilis N. Ioannidis","orcid":"https://orcid.org/0000-0002-8367-0733"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ioannidis, Vassilis N.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130381919","display_name":"Huzefa Rangwala","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rangwala, Huzefa","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5110359290","display_name":"George Karypis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Karypis, George","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5130356104"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.2551000118255615,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.2551000118255615,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2264000028371811,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.14059999585151672,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6430000066757202},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5561000108718872},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5019000172615051},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.4796000123023987},{"id":"https://openalex.org/keywords/routing","display_name":"Routing (electronic design automation)","score":0.4514000117778778},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.3582000136375427},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.33799999952316284},{"id":"https://openalex.org/keywords/multiple-models","display_name":"Multiple Models","score":0.3343999981880188}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.824400007724762},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6430000066757202},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5561000108718872},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5019000172615051},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.4796000123023987},{"id":"https://openalex.org/C74172769","wikidata":"https://www.wikidata.org/wiki/Q1446839","display_name":"Routing (electronic design automation)","level":2,"score":0.4514000117778778},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4431000053882599},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.435699999332428},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4242999851703644},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.3582000136375427},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.33799999952316284},{"id":"https://openalex.org/C2779714256","wikidata":"https://www.wikidata.org/wiki/Q25305062","display_name":"Multiple Models","level":2,"score":0.3343999981880188},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3273000121116638},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.32710000872612},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.2847999930381775},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.28209999203681946},{"id":"https://openalex.org/C190839683","wikidata":"https://www.wikidata.org/wiki/Q2448197","display_name":"Train","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.274399995803833},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2671999931335449},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.26159998774528503}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.19415","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19415","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.19415","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19415","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Prompt":[0],"routing":[1,66],"dynamically":[2],"selects":[3],"the":[4,148],"most":[5],"appropriate":[6],"large":[7],"language":[8],"model":[9,24,151],"from":[10,122],"a":[11,64,94,106],"pool":[12],"of":[13,30],"candidates":[14],"for":[15,113],"each":[16],"query,":[17],"optimizing":[18],"performance":[19,35],"while":[20,51,152],"managing":[21],"costs.":[22],"As":[23],"pools":[25],"scale":[26],"to":[27,55,87,96,99,125],"include":[28],"dozens":[29],"frontier":[31,138],"models":[32],"with":[33,109,129,136],"narrow":[34],"gaps,":[36],"existing":[37,144],"approaches":[38],"face":[39],"significant":[40],"challenges:":[41],"manually":[42],"defined":[43],"task":[44,75,90],"taxonomies":[45],"cannot":[46],"capture":[47],"fine-grained":[48,74],"capability":[49],"distinctions,":[50],"monolithic":[52],"routers":[53],"struggle":[54],"differentiate":[56],"subtle":[57],"differences":[58],"across":[59],"diverse":[60],"tasks.":[61,101],"We":[62],"propose":[63],"two-stage":[65],"architecture":[67,108],"that":[68],"addresses":[69],"these":[70],"limitations":[71],"through":[72],"automated":[73],"discovery":[76],"and":[77,92,146],"task-aware":[78],"quality":[79,115],"estimation.":[80],"Our":[81],"first":[82],"stage":[83,104],"employs":[84],"graph-based":[85],"clustering":[86],"discover":[88],"latent":[89],"types":[91],"trains":[93],"classifier":[95],"assign":[97],"prompts":[98],"discovered":[100],"The":[102],"second":[103],"uses":[105],"mixture-of-experts":[107],"task-specific":[110],"prediction":[111],"heads":[112],"specialized":[114],"estimates.":[116],"At":[117],"inference,":[118],"we":[119],"aggregate":[120],"predictions":[121],"both":[123],"stages":[124],"balance":[126],"task-level":[127],"stability":[128],"prompt-specific":[130],"adaptability.":[131],"Evaluated":[132],"on":[133],"10":[134],"benchmarks":[135],"11":[137],"models,":[139],"our":[140],"method":[141],"consistently":[142],"outperforms":[143],"baselines":[145],"surpasses":[147],"strongest":[149],"individual":[150],"incurring":[153],"less":[154],"than":[155],"half":[156],"its":[157],"cost.":[158]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-24T00:00:00"}
