{"id":"https://openalex.org/W7159687670","doi":"https://doi.org/10.48550/arxiv.2604.28020","title":"Cost-Aware Learning","display_name":"Cost-Aware Learning","publication_year":2026,"publication_date":"2026-04-30","ids":{"openalex":"https://openalex.org/W7159687670","doi":"https://doi.org/10.48550/arxiv.2604.28020"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.28020","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.28020","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.28020","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134929994","display_name":"Clara Mohri","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mohri, Clara","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047817959","display_name":"Amir Globerson","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Globerson, Amir","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006699796","display_name":"Haim Kaplan","orcid":"https://orcid.org/0000-0001-9586-8002"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kaplan, Haim","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053775980","display_name":"Tomer Koren","orcid":"https://orcid.org/0000-0002-9061-0448"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Koren, Tomer","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134950644","display_name":"Yishay Mansour","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mansour, Yishay","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5134929994"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.48420000076293945,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.48420000076293945,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.2822999954223633,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.03849999979138374,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5945000052452087},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5151000022888184},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.49720001220703125},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4887000024318695},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4593000113964081},{"id":"https://openalex.org/keywords/empirical-risk-minimization","display_name":"Empirical risk minimization","score":0.43630000948905945},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.42590001225471497},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.42239999771118164},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.40939998626708984}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5945000052452087},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5771999955177307},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5335999727249146},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5151000022888184},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.49720001220703125},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4887000024318695},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4593000113964081},{"id":"https://openalex.org/C107321475","wikidata":"https://www.wikidata.org/wiki/Q5374254","display_name":"Empirical risk minimization","level":2,"score":0.43630000948905945},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.42590001225471497},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.42239999771118164},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.40939998626708984},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4092999994754791},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.38999998569488525},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38830000162124634},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.3528999984264374},{"id":"https://openalex.org/C157972887","wikidata":"https://www.wikidata.org/wiki/Q463359","display_name":"Convex optimization","level":3,"score":0.3513999879360199},{"id":"https://openalex.org/C149629883","wikidata":"https://www.wikidata.org/wiki/Q660926","display_name":"Fraction (chemistry)","level":2,"score":0.3506999909877777},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.31360000371932983},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.303600013256073},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2921000123023987},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.2833000123500824},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.2824999988079071},{"id":"https://openalex.org/C40506919","wikidata":"https://www.wikidata.org/wiki/Q7452469","display_name":"Sequence learning","level":2,"score":0.2791999876499176},{"id":"https://openalex.org/C145446738","wikidata":"https://www.wikidata.org/wiki/Q319913","display_name":"Convex function","level":3,"score":0.2757999897003174},{"id":"https://openalex.org/C148764684","wikidata":"https://www.wikidata.org/wiki/Q621751","display_name":"Approximation algorithm","level":2,"score":0.2752000093460083},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.2676999866962433},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C2987595161","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Optimization algorithm","level":2,"score":0.2547999918460846}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.28020","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.28020","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.28020","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.28020","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,31,48,71,93],"consider":[1],"the":[2,28,83,96,128],"problem":[3],"of":[4,11,53,63,86],"Cost-Aware":[5,33,112],"Learning,":[6],"where":[7,82],"sampling":[8],"different":[9,16],"components":[10],"a":[12,23,37,50,61,68,101],"finite-sum":[13],"objective":[14,19],"incurs":[15],"costs.":[17],"The":[18],"is":[20],"to":[21,45,76,110],"reach":[22],"target":[24],"error":[25],"while":[26,134],"minimizing":[27],"total":[29],"cost.":[30],"propose":[32],"SGD,":[34],"which":[35],"uses":[36],"distribution":[38,64],"based":[39],"on":[40,116],"gradient":[41,105],"norms":[42],"and":[43,67,107,119],"costs":[44],"sample":[46],"components.":[47],"provide":[49],"thorough":[51],"analysis":[52],"this":[54,109,124],"algorithm,":[55],"including":[56],"cost-improvement":[57],"bounds":[58],"over":[59],"baselines,":[60],"characterization":[62],"proxy":[65,103],"sub-optimality,":[66],"lower":[69],"bound.":[70],"apply":[72],"our":[73],"theoretical":[74],"insights":[75],"reinforcement":[77],"learning":[78],"with":[79,91],"language":[80],"models,":[81],"computational":[84],"cost":[85],"sequence-level":[87],"policy":[88,132],"gradients":[89],"varies":[90],"length.":[92],"find":[94],"that":[95,123],"advantage":[97],"magnitude":[98],"serves":[99],"as":[100],"high-fidelity":[102],"for":[104],"norms,":[106],"use":[108],"introduce":[111],"GRPO.":[113],"Empirical":[114],"results":[115],"1.5B,":[117],"4B,":[118],"8B":[120],"LLMs":[121],"demonstrate":[122],"algorithm":[125],"significantly":[126],"reduces":[127],"tokens":[129],"used":[130],"in":[131],"optimization":[133],"matching":[135],"or":[136],"exceeding":[137],"baseline":[138],"accuracy.":[139]},"counts_by_year":[],"updated_date":"2026-06-02T06:17:35.589633","created_date":"2026-05-02T00:00:00"}
