{"id":"https://openalex.org/W4391670462","doi":"https://doi.org/10.48550/arxiv.2402.05110","title":"Opening the AI black box: program synthesis via mechanistic interpretability","display_name":"Opening the AI black box: program synthesis via mechanistic interpretability","publication_year":2024,"publication_date":"2024-02-07","ids":{"openalex":"https://openalex.org/W4391670462","doi":"https://doi.org/10.48550/arxiv.2402.05110"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2402.05110","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2402.05110","pdf_url":"https://arxiv.org/pdf/2402.05110","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2402.05110","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071048251","display_name":"Eric J. Michaud","orcid":"https://orcid.org/0000-0001-7912-1953"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Michaud, Eric J.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108520183","display_name":"Isaac Liao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liao, Isaac","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015527945","display_name":"Vedang Lad","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lad, Vedang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101924541","display_name":"Ziming Liu","orcid":"https://orcid.org/0000-0002-7090-8853"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Ziming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023335993","display_name":"Anish Mudide","orcid":"https://orcid.org/0000-0002-6174-2345"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mudide, Anish","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038730512","display_name":"Chloe Loughridge","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Loughridge, Chloe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089640822","display_name":"Zifan Carl Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Zifan Carl","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093775237","display_name":"Tara Rezaei Kheirkhah","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kheirkhah, Tara Rezaei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104241454","display_name":"Mateja Vukeli\u0107","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vukeli\u0107, Mateja","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5091601455","display_name":"Max Tegmark","orcid":"https://orcid.org/0000-0001-7670-7190"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tegmark, Max","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5071048251"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9672999978065491,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9672999978065491,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12002","display_name":"Computability, Logic, AI Algorithms","score":0.9291999936103821,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.9556446075439453},{"id":"https://openalex.org/keywords/black-box","display_name":"Black box","score":0.7450317144393921},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4606615900993347},{"id":"https://openalex.org/keywords/box-model","display_name":"Box model","score":0.43028366565704346},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3941973149776459},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.1058824360370636}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.9556446075439453},{"id":"https://openalex.org/C94966114","wikidata":"https://www.wikidata.org/wiki/Q29256","display_name":"Black box","level":2,"score":0.7450317144393921},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4606615900993347},{"id":"https://openalex.org/C2992995325","wikidata":"https://www.wikidata.org/wiki/Q4951592","display_name":"Box model","level":2,"score":0.43028366565704346},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3941973149776459},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.1058824360370636},{"id":"https://openalex.org/C91586092","wikidata":"https://www.wikidata.org/wiki/Q757520","display_name":"Atmospheric sciences","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2402.05110","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2402.05110","pdf_url":"https://arxiv.org/pdf/2402.05110","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2402.05110","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2402.05110","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2402.05110","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2402.05110","pdf_url":"https://arxiv.org/pdf/2402.05110","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2004883221","display_name":null,"funder_award_id":"PHY-2019786","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3509299210","display_name":null,"funder_award_id":"2019786","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G539115378","display_name":null,"funder_award_id":"2141064","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4391670462.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2797441709","https://openalex.org/W2943982549","https://openalex.org/W2886918272","https://openalex.org/W4387589990","https://openalex.org/W4297660007","https://openalex.org/W2346578521","https://openalex.org/W2910028250","https://openalex.org/W4241566321","https://openalex.org/W3101055019","https://openalex.org/W3094353829"],"abstract_inverted_index":{"We":[0,30,127],"present":[1],"MIPS,":[2],"a":[3,34,81],"novel":[4],"method":[5],"for":[6,132],"program":[7,104],"synthesis":[8,105],"based":[9],"on":[10,33],"automated":[11],"mechanistic":[12],"interpretability":[13],"of":[14,36,57,110],"neural":[15],"networks":[16],"trained":[17],"to":[18,52,76,92,99,137],"perform":[19],"the":[20,24,78,94],"desired":[21],"task,":[22],"auto-distilling":[23],"learned":[25,43,95],"algorithm":[26],"into":[27,80],"Python":[28],"code.":[29],"test":[31],"MIPS":[32,54,71],"benchmark":[35],"62":[37],"algorithmic":[38],"tasks":[39],"that":[40,61],"can":[41],"be":[42],"by":[44,65],"an":[45,73],"RNN":[46,79],"and":[47,123,130,143],"find":[48],"it":[49],"highly":[50],"complementary":[51],"GPT-4:":[53],"solves":[55,69],"32":[56],"them,":[58],"including":[59],"13":[60],"are":[62],"not":[63,114],"solved":[64],"GPT-4":[66],"(which":[67],"also":[68],"30).":[70],"uses":[72],"integer":[74,89],"autoencoder":[75],"convert":[77],"finite":[82],"state":[83],"machine,":[84],"then":[85],"applies":[86],"Boolean":[87],"or":[88],"symbolic":[90],"regression":[91],"capture":[93],"algorithm.":[96],"As":[97],"opposed":[98],"large":[100],"language":[101],"models,":[102],"this":[103,135],"technique":[106],"makes":[107],"no":[108],"use":[109],"(and":[111],"is":[112],"therefore":[113],"limited":[115],"by)":[116],"human":[117],"training":[118],"data":[119],"such":[120],"as":[121],"algorithms":[122],"code":[124],"from":[125],"GitHub.":[126],"discuss":[128],"opportunities":[129],"challenges":[131],"scaling":[133],"up":[134],"approach":[136],"make":[138],"machine-learned":[139],"models":[140],"more":[141],"interpretable":[142],"trustworthy.":[144]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
