{"id":"https://openalex.org/W7154393654","doi":"https://doi.org/10.48550/arxiv.2604.10387","title":"Leveraging Mathematical Reasoning of LLMs for Efficient GPU Thread Mapping","display_name":"Leveraging Mathematical Reasoning of LLMs for Efficient GPU Thread Mapping","publication_year":2026,"publication_date":"2026-04-12","ids":{"openalex":"https://openalex.org/W7154393654","doi":"https://doi.org/10.48550/arxiv.2604.10387"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.10387","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10387","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.10387","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005567535","display_name":"Jose Maureira","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maureira, Jose","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088815725","display_name":"Crist\u00f3bal A. Navarro","orcid":"https://orcid.org/0000-0001-7090-9904"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Navarro, Crist\u00f3bal A.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085478645","display_name":"H\u00e9ctor Ferrada","orcid":"https://orcid.org/0000-0002-8334-4540"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ferrada, Hector","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5041812306","display_name":"Luis Veas-Castillo","orcid":"https://orcid.org/0000-0003-3490-9995"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Veas-Castillo, Luis","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.8392000198364258,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.8392000198364258,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.031599998474121094,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.02199999988079071,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bespoke","display_name":"Bespoke","score":0.6184999942779541},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5825999975204468},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5126000046730042},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.4902999997138977},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.46070000529289246},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.45019999146461487},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4011000096797943},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.37540000677108765}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7882999777793884},{"id":"https://openalex.org/C44210515","wikidata":"https://www.wikidata.org/wiki/Q16968978","display_name":"Bespoke","level":2,"score":0.6184999942779541},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5825999975204468},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5126000046730042},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.4902999997138977},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.46070000529289246},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.45019999146461487},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4390999972820282},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4011000096797943},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.37540000677108765},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3734999895095825},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.3271999955177307},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.3052999973297119},{"id":"https://openalex.org/C176809094","wikidata":"https://www.wikidata.org/wiki/Q15401496","display_name":"Traverse","level":2,"score":0.3003000020980835},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.2962999939918518},{"id":"https://openalex.org/C127964446","wikidata":"https://www.wikidata.org/wiki/Q1092142","display_name":"Computational resource","level":3,"score":0.28780001401901245},{"id":"https://openalex.org/C60011546","wikidata":"https://www.wikidata.org/wiki/Q932996","display_name":"Parallel coordinates","level":4,"score":0.2849000096321106},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.2800999879837036},{"id":"https://openalex.org/C2777472644","wikidata":"https://www.wikidata.org/wiki/Q16968992","display_name":"Approximate inference","level":3,"score":0.27720001339912415},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.2752000093460083},{"id":"https://openalex.org/C2775937380","wikidata":"https://www.wikidata.org/wiki/Q1232589","display_name":"Replica","level":2,"score":0.2718000113964081},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.25279998779296875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.10387","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10387","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.10387","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10387","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6449534893035889,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Mapping":[0],"parallel":[1],"threads":[2],"onto":[3],"non-box-shaped":[4],"domains":[5,75,99],"is":[6,145],"a":[7,42,69,132,146,182,208],"known":[8],"challenge":[9],"in":[10],"GPU":[11,177,215],"computing;":[12],"efficient":[13],"mapping":[14,34,93],"prevents":[15],"performance":[16],"penalties":[17],"from":[18],"unnecessary":[19],"resource":[20,216],"allocation.":[21],"Currently,":[22],"achieving":[23],"this":[24,56,116,144],"requires":[25],"significant":[26],"analytical":[27,154],"human":[28],"effort":[29],"to":[30,54,168],"manually":[31],"derive":[32],"bespoke":[33],"functions":[35],"for":[36,95,138],"each":[37],"geometry.":[38],"This":[39,198],"work":[40],"introduces":[41],"novel":[43],"approach":[44,117],"leveraging":[45],"the":[46,112,123,152,195,201],"symbolic":[47,106],"reasoning":[48],"of":[49,76,115,204],"Large":[50],"Language":[51],"Models":[52],"(LLMs)":[53],"automate":[55],"derivation":[57],"entirely":[58],"through":[59],"in-context":[60],"learning.":[61],"Focusing":[62],"on":[63,118],"state-of-the-art":[64],"open-weights":[65],"models,":[66],"we":[67,110,180],"conducted":[68],"rigorous":[70],"comparative":[71],"analysis":[72],"across":[73],"spatial":[74],"increasing":[77],"complexity.":[78],"Our":[79],"results":[80],"demonstrate":[81],"that":[82],"modern":[83],"local":[84],"LLMs":[85],"successfully":[86],"infer":[87],"exact":[88],"O(1)":[89],"and":[90,100,125,163,171],"O(log":[91],"N)":[92],"equations":[94],"complex":[96],"2D/3D":[97],"dense":[98],"2D":[101],"fractals,":[102],"vastly":[103],"outperforming":[104],"traditional":[105],"regression":[107],"methods.":[108],"Crucially,":[109],"profile":[111],"energetic":[113],"viability":[114],"high-performance":[119],"infrastructure,":[120],"distinguishing":[121],"between":[122],"code-generation":[124],"execution":[126],"phases.":[127],"While":[128],"one-time":[129],"inference":[130],"incurs":[131],"high":[133],"energy":[134,162,173],"penalty":[135],"--":[136,143],"particularly":[137],"reasoning-focused":[139],"models":[140,188],"like":[141],"DeepSeek-R1":[142],"single":[147],"upfront":[148],"investment.":[149],"Once":[150],"integrated,":[151],"generated":[153],"kernels":[155],"eliminate":[156],"block":[157],"waste":[158],"entirely,":[159],"yielding":[160],"massive":[161],"time":[164],"savings":[165],"(e.g.,":[166,194],"up":[167],"4833x":[169],"speedup":[170],"2890x":[172],"reduction)":[174],"during":[175],"actual":[176],"workloads.":[178],"Finally,":[179],"identify":[181],"current":[183],"\"reasoning":[184],"ceiling\"":[185],"when":[186],"these":[187],"face":[189],"highly":[190],"recursive":[191],"3D":[192],"fractals":[193],"Menger":[196],"Sponge).":[197],"limitation":[199],"benchmarks":[200],"present":[202],"maturity":[203],"open-weight":[205],"architectures,":[206],"charting":[207],"viable":[209],"path":[210],"toward":[211],"fully":[212],"automated,":[213],"energy-efficient":[214],"optimization.":[217]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-15T00:00:00"}
