{"id":"https://openalex.org/W7161719558","doi":"https://doi.org/10.48550/arxiv.2605.17978","title":"AutoVecCoder: Teaching LLMs to Generate Explicitly Vectorized Code","display_name":"AutoVecCoder: Teaching LLMs to Generate Explicitly Vectorized Code","publication_year":2026,"publication_date":"2026-05-18","ids":{"openalex":"https://openalex.org/W7161719558","doi":"https://doi.org/10.48550/arxiv.2605.17978"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.17978","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17978","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.17978","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136465626","display_name":"Shangzhan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Shangzhan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136504832","display_name":"Xinyu Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Xinyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136493379","display_name":"Xuanyu Jin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Xuanyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136501817","display_name":"Ye He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Ye","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136485883","display_name":"Yuxin Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Yuxin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136504447","display_name":"Yuxuan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yuxuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136473240","display_name":"Xu Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Xu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136466235","display_name":"Wanxiang Che","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Che, Wanxiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136471484","display_name":"Qi Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Qi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136460696","display_name":"Ting Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Ting","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136473681","display_name":"Maosong Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Maosong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5547000169754028,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5547000169754028,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.1005999967455864,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.07259999960660934,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.944599986076355},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.746399998664856},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7001000046730042},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6496999859809875},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.5224000215530396},{"id":"https://openalex.org/keywords/cornerstone","display_name":"Cornerstone","score":0.42489999532699585},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.41819998621940613}],"concepts":[{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.944599986076355},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8105000257492065},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.746399998664856},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7001000046730042},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6496999859809875},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.5831000208854675},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.5224000215530396},{"id":"https://openalex.org/C2780616401","wikidata":"https://www.wikidata.org/wiki/Q1133673","display_name":"Cornerstone","level":2,"score":0.42489999532699585},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.41819998621940613},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.41749998927116394},{"id":"https://openalex.org/C109747225","wikidata":"https://www.wikidata.org/wiki/Q815758","display_name":"Scarcity","level":2,"score":0.3767000138759613},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.35740000009536743},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3391000032424927},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29600000381469727},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.28619998693466187},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.2838999927043915},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C2988963302","wikidata":"https://www.wikidata.org/wiki/Q629206","display_name":"Program code","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2667999863624573},{"id":"https://openalex.org/C2779010991","wikidata":"https://www.wikidata.org/wiki/Q2720909","display_name":"Artifact (error)","level":2,"score":0.26269999146461487},{"id":"https://openalex.org/C19024347","wikidata":"https://www.wikidata.org/wiki/Q211496","display_name":"High-level programming language","level":3,"score":0.25209999084472656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.17978","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17978","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.17978","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17978","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vectorization":[0],"via":[1],"Single":[2],"Instruction,":[3],"Multiple":[4],"Data":[5],"(SIMD)":[6],"architectures":[7],"is":[8],"a":[9,79,111],"cornerstone":[10],"of":[11,61,69,89,136,153],"high-performance":[12],"computing.":[13],"To":[14],"fully":[15],"exploit":[16],"hardware":[17,71],"potential,":[18],"developers":[19],"often":[20],"resort":[21],"to":[22,35,58,83,104],"explicit":[23,55,91],"vectorization":[24,56],"using":[25],"intrinsics,":[26],"as":[27],"compiler-based":[28],"auto-vectorization":[29],"frequently":[30],"yields":[31],"suboptimal":[32],"results":[33],"due":[34,57],"conservative":[36],"static":[37],"analysis.":[38],"While":[39],"Large":[40],"Language":[41],"Models":[42],"(LLMs)":[43],"have":[44],"demonstrated":[45],"remarkable":[46],"proficiency":[47],"in":[48,139],"general":[49],"code":[50,117],"generation,":[51],"they":[52],"struggle":[53],"with":[54,86,119],"the":[59,65,87,131,150],"scarcity":[60],"high-quality":[62],"corpora":[63],"and":[64,109,133],"strict":[66],"semantic":[67],"constraints":[68],"low-level":[70],"instructions.":[72],"In":[73],"this":[74,125],"paper,":[75],"we":[76],"propose":[77],"AutoVecCoder,":[78],"novel":[80],"framework":[81,114,126],"designed":[82],"empower":[84],"LLMs":[85],"capability":[88],"automated":[90,100,155],"vectorization.":[92,156],"AutoVecCoder":[93],"integrates":[94],"two":[95],"core":[96],"components:":[97],"VecPrompt,":[98],"an":[99],"data":[101],"synthesis":[102],"pipeline":[103],"inject":[105],"domain-specific":[106],"intrinsic":[107],"knowledge;":[108],"VecRL,":[110],"reinforcement":[112],"learning":[113],"that":[115],"aligns":[116],"generation":[118],"execution":[120],"efficiency.":[121],"AutoVecCoder-8B":[122],"trained":[123],"by":[124],"achieves":[127],"state-of-the-art":[128],"performance":[129],"on":[130],"SSE":[132],"AVX":[134],"subsets":[135],"SimdBench":[137],"and,":[138],"some":[140],"cases,":[141],"generates":[142],"implementations":[143],"surpassing":[144],"standard":[145],"-O3":[146],"optimizations,":[147],"effectively":[148],"overcoming":[149],"inherent":[151],"bottlenecks":[152],"traditional":[154]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-20T00:00:00"}
