{"id":"https://openalex.org/W7133557195","doi":"https://doi.org/10.48550/arxiv.2603.02787","title":"Rethinking Code Similarity for Automated Algorithm Design with LLMs","display_name":"Rethinking Code Similarity for Automated Algorithm Design with LLMs","publication_year":2026,"publication_date":"2026-03-03","ids":{"openalex":"https://openalex.org/W7133557195","doi":"https://doi.org/10.48550/arxiv.2603.02787"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.02787","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02787","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.02787","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128068918","display_name":"Rui Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Rui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128047114","display_name":"Zhichao Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Zhichao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5128068918"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.2289000004529953,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.2289000004529953,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.13269999623298645,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.08749999850988388,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5223000049591064},{"id":"https://openalex.org/keywords/equivalence","display_name":"Equivalence (formal languages)","score":0.5019999742507935},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.49889999628067017},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4862000048160553},{"id":"https://openalex.org/keywords/syntax","display_name":"Syntax","score":0.4781000018119812},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4523000121116638},{"id":"https://openalex.org/keywords/dynamic-time-warping","display_name":"Dynamic time warping","score":0.435699999332428},{"id":"https://openalex.org/keywords/algorithm-design","display_name":"Algorithm design","score":0.4311999976634979},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.3465000092983246}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7429999709129333},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.623199999332428},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5223000049591064},{"id":"https://openalex.org/C2780069185","wikidata":"https://www.wikidata.org/wiki/Q7977945","display_name":"Equivalence (formal languages)","level":2,"score":0.5019999742507935},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.49889999628067017},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4862000048160553},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.4781000018119812},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4523000121116638},{"id":"https://openalex.org/C88516994","wikidata":"https://www.wikidata.org/wiki/Q1268863","display_name":"Dynamic time warping","level":2,"score":0.435699999332428},{"id":"https://openalex.org/C106516650","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm design","level":2,"score":0.4311999976634979},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.391400009393692},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.3465000092983246},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34630000591278076},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.34040001034736633},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3077999949455261},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.30399999022483826},{"id":"https://openalex.org/C2776517306","wikidata":"https://www.wikidata.org/wiki/Q29017317","display_name":"Similarity measure","level":2,"score":0.30250000953674316},{"id":"https://openalex.org/C68859911","wikidata":"https://www.wikidata.org/wiki/Q1503724","display_name":"Pattern matching","level":2,"score":0.29820001125335693},{"id":"https://openalex.org/C44359876","wikidata":"https://www.wikidata.org/wiki/Q5338467","display_name":"Edit distance","level":2,"score":0.2921999990940094},{"id":"https://openalex.org/C114408938","wikidata":"https://www.wikidata.org/wiki/Q333373","display_name":"Abstract syntax","level":3,"score":0.289000004529953},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28760001063346863},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2815999984741211},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.27959999442100525},{"id":"https://openalex.org/C2988012377","wikidata":"https://www.wikidata.org/wiki/Q29966452","display_name":"Functional equivalence","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.2603999972343445},{"id":"https://openalex.org/C311688","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Time complexity","level":2,"score":0.2549000084400177},{"id":"https://openalex.org/C58646249","wikidata":"https://www.wikidata.org/wiki/Q127380","display_name":"Abstract syntax tree","level":3,"score":0.25429999828338623}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.02787","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02787","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.02787","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02787","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"rise":[1],"of":[2,19,103,109,184,193,199],"Large":[3],"Language":[4],"Model-based":[5],"Automated":[6],"Algorithm":[7,173],"Design":[8],"(LLM-AAD)":[9],"has":[10],"transformed":[11],"algorithm":[12,25,37],"development":[13],"by":[14,179],"autonomously":[15],"generating":[16],"code":[17,65,198],"implementations":[18],"expert-level":[20],"algorithms.":[21,195],"Unlike":[22],"traditional":[23],"expert-driven":[24],"development,":[26],"in":[27,42,146],"the":[28,31,43,86,101,122,190],"LLM-AAD":[29,157],"paradigm,":[30],"main":[32],"design":[33],"principle":[34],"behind":[35],"an":[36],"is":[38],"often":[39],"implicitly":[40],"embedded":[41],"generated":[44,177],"code.":[45],"Therefore,":[46],"assessing":[47],"algorithmic":[48,55,73,88,98],"similarity":[49,66,99],"directly":[50],"from":[51,57],"code,":[52],"distinguishing":[53],"genuine":[54],"innovation":[56],"mere":[58],"syntactic":[59,138],"variation,":[60],"becomes":[61],"essential.":[62],"While":[63],"various":[64],"metrics":[67],"exist,":[68],"they":[69,76],"fail":[70],"to":[71,96],"capture":[72],"similarity,":[74],"as":[75,106,116],"focus":[77],"on":[78,168],"surface-level":[79],"syntax":[80],"or":[81,139],"output":[82],"equivalence":[83],"rather":[84],"than":[85],"underlying":[87],"logic.":[89],"We":[90,142],"propose":[91],"BehaveSim,":[92],"a":[93,107],"novel":[94],"method":[95],"measure":[97],"through":[100],"lens":[102],"problem-solving":[104,117,185],"behavior":[105],"sequence":[108],"intermediate":[110],"solutions":[111],"produced":[112],"during":[113],"execution,":[114],"dubbed":[115],"trajectories":[118],"(PSTrajs).":[119],"By":[120],"quantifying":[121],"alignment":[123],"between":[124],"PSTrajs":[125],"using":[126],"dynamic":[127],"time":[128],"warping":[129],"(DTW),":[130],"BehaveSim":[131,154,175],"distinguishes":[132],"algorithms":[133,178],"with":[134],"divergent":[135],"logic":[136],"despite":[137],"output-level":[140],"similarities.":[141],"demonstrate":[143],"its":[144],"utility":[145],"two":[147],"key":[148],"applications:":[149],"(i)":[150],"Enhancing":[151],"LLM-AAD:":[152],"Integrating":[153],"into":[155],"existing":[156],"frameworks":[158],"(e.g.,":[159],"FunSearch,":[160],"EoH)":[161],"promotes":[162],"behavioral":[163],"diversity,":[164],"significantly":[165],"improving":[166],"performance":[167],"three":[169],"AAD":[170],"tasks.":[171],"(ii)":[172],"analysis:":[174],"clusters":[176],"behavior,":[180],"enabling":[181],"systematic":[182],"analysis":[183],"strategies--a":[186],"crucial":[187],"tool":[188],"for":[189],"growing":[191],"ecosystem":[192],"AI-generated":[194],"Data":[196],"and":[197],"this":[200],"work":[201],"are":[202],"open-sourced":[203],"at":[204],"https://github.com/RayZhhh/behavesim.":[205]},"counts_by_year":[],"updated_date":"2026-03-05T07:36:02.291473","created_date":"2026-03-05T00:00:00"}
