{"id":"https://openalex.org/W4417534303","doi":"https://doi.org/10.1109/fccm68464.2026.00035","title":"AIE4ML: An End-to-End Framework for Compiling Neural Networks for the Next Generation of AMD AI Engines","display_name":"AIE4ML: An End-to-End Framework for Compiling Neural Networks for the Next Generation of AMD AI Engines","publication_year":2026,"publication_date":"2026-05-13","ids":{"openalex":"https://openalex.org/W4417534303","doi":"https://doi.org/10.1109/fccm68464.2026.00035"},"language":"en","primary_location":{"id":"doi:10.1109/fccm68464.2026.00035","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fccm68464.2026.00035","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE 34th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2512.15946","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Dimitrios Danopoulos","orcid":null},"institutions":[{"id":"https://openalex.org/I67311998","display_name":"European Organization for Nuclear Research","ror":"https://ror.org/01ggx4157","country_code":"CH","type":"facility","lineage":["https://openalex.org/I67311998"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Dimitrios Danopoulos","raw_affiliation_strings":["European Organization for Nuclear Research (CERN),Geneva,Switzerland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"European Organization for Nuclear Research (CERN),Geneva,Switzerland","institution_ids":["https://openalex.org/I67311998"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Enrico Lupi","orcid":null},"institutions":[{"id":"https://openalex.org/I67311998","display_name":"European Organization for Nuclear Research","ror":"https://ror.org/01ggx4157","country_code":"CH","type":"facility","lineage":["https://openalex.org/I67311998"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Enrico Lupi","raw_affiliation_strings":["European Organization for Nuclear Research (CERN),Geneva,Switzerland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"European Organization for Nuclear Research (CERN),Geneva,Switzerland","institution_ids":["https://openalex.org/I67311998"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Chang Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I67311998","display_name":"European Organization for Nuclear Research","ror":"https://ror.org/01ggx4157","country_code":"CH","type":"facility","lineage":["https://openalex.org/I67311998"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Chang Sun","raw_affiliation_strings":["European Organization for Nuclear Research (CERN),Geneva,Switzerland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"European Organization for Nuclear Research (CERN),Geneva,Switzerland","institution_ids":["https://openalex.org/I67311998"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Sebastian Dittmeier","orcid":null},"institutions":[{"id":"https://openalex.org/I223822909","display_name":"Heidelberg University","ror":"https://ror.org/038t36y30","country_code":"DE","type":"education","lineage":["https://openalex.org/I223822909"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sebastian Dittmeier","raw_affiliation_strings":["Heidelberg University,Physikalisches Institut,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Heidelberg University,Physikalisches Institut,Germany","institution_ids":["https://openalex.org/I223822909"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Michael Kagan","orcid":null},"institutions":[{"id":"https://openalex.org/I2801935854","display_name":"SLAC National Accelerator Laboratory","ror":"https://ror.org/05gzmn429","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I2801935854","https://openalex.org/I39565521","https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Kagan","raw_affiliation_strings":["SLAC National Accelerator Laboratory,Menlo Park,CA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SLAC National Accelerator Laboratory,Menlo Park,CA,USA","institution_ids":["https://openalex.org/I2801935854"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Vladimir Loncar","orcid":null},"institutions":[{"id":"https://openalex.org/I4210149559","display_name":"Institute of Physics Belgrade","ror":"https://ror.org/04h3h5b09","country_code":"RS","type":"facility","lineage":["https://openalex.org/I4068193","https://openalex.org/I4210149559"]}],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Vladimir Loncar","raw_affiliation_strings":["Institute of Physics Belgrade,Serbia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Physics Belgrade,Serbia","institution_ids":["https://openalex.org/I4210149559"]}]},{"author_position":"last","author":{"id":null,"display_name":"Maurizio Pierini","orcid":null},"institutions":[{"id":"https://openalex.org/I67311998","display_name":"European Organization for Nuclear Research","ror":"https://ror.org/01ggx4157","country_code":"CH","type":"facility","lineage":["https://openalex.org/I67311998"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Maurizio Pierini","raw_affiliation_strings":["European Organization for Nuclear Research (CERN),Geneva,Switzerland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"European Organization for Nuclear Research (CERN),Geneva,Switzerland","institution_ids":["https://openalex.org/I67311998"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0150486,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"176","last_page":"184"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.5174000263214111,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.5174000263214111,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.2669999897480011,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.04450000077486038,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/firmware","display_name":"Firmware","score":0.58160001039505},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5613999962806702},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4787999987602234},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4366999864578247},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.41449999809265137},{"id":"https://openalex.org/keywords/reboot","display_name":"Reboot","score":0.36010000109672546},{"id":"https://openalex.org/keywords/applications-of-artificial-intelligence","display_name":"Applications of artificial intelligence","score":0.35839998722076416},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.35190001130104065},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.3352000117301941}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8543000221252441},{"id":"https://openalex.org/C67212190","wikidata":"https://www.wikidata.org/wiki/Q104851","display_name":"Firmware","level":2,"score":0.58160001039505},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5613999962806702},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4787999987602234},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4366999864578247},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.41449999809265137},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.39660000801086426},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3953999876976013},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.36579999327659607},{"id":"https://openalex.org/C120524526","wikidata":"https://www.wikidata.org/wiki/Q1709148","display_name":"Reboot","level":2,"score":0.36010000109672546},{"id":"https://openalex.org/C157170001","wikidata":"https://www.wikidata.org/wiki/Q4781507","display_name":"Applications of artificial intelligence","level":2,"score":0.35839998722076416},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.35190001130104065},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3352000117301941},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.32679998874664307},{"id":"https://openalex.org/C201410400","wikidata":"https://www.wikidata.org/wiki/Q1064412","display_name":"Multithreading","level":3,"score":0.32330000400543213},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.32030001282691956},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.31139999628067017},{"id":"https://openalex.org/C2781020372","wikidata":"https://www.wikidata.org/wiki/Q533093","display_name":"On the fly","level":2,"score":0.3095000088214874},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.3061999976634979},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3005000054836273},{"id":"https://openalex.org/C16311509","wikidata":"https://www.wikidata.org/wiki/Q4148050","display_name":"Dependency graph","level":3,"score":0.29670000076293945},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2928999960422516},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.2906999886035919},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.2822999954223633},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.2770000100135803},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.2768000066280365},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.27570000290870667},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C28034677","wikidata":"https://www.wikidata.org/wiki/Q17092530","display_name":"Interleaving","level":2,"score":0.26420000195503235},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2635999917984009},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/fccm68464.2026.00035","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fccm68464.2026.00035","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE 34th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2512.15946","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.15946","pdf_url":"https://arxiv.org/pdf/2512.15946","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:cds.cern.ch:2953485","is_oa":false,"landing_page_url":"http://cds.cern.ch/record/2953485","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},{"id":"doi:10.48550/arxiv.2512.15946","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.15946","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2512.15946","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.15946","pdf_url":"https://arxiv.org/pdf/2512.15946","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320313559","display_name":"CERN","ror":"https://ror.org/01ggx4157"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Efficient":[0],"AI":[1,6,53],"inference":[2],"on":[3,26],"AMD\u2019s":[4],"Versal":[5],"Engine":[7],"(AIE)":[8],"is":[9],"challenging":[10],"due":[11],"to":[12,81,111,161,203,207],"tightly":[13],"coupled":[14],"VLIW":[15],"execution,":[16],"explicit":[17],"datapaths,":[18],"and":[19,88,105,126,138,157,174],"local":[20],"memory":[21,109],"management.":[22],"Prior":[23],"work":[24],"focused":[25],"first-generation":[27],"AIE":[28,215],"kernel":[29],"optimizations,":[30],"without":[31],"tackling":[32],"full":[33],"neural":[34],"network":[35],"execution":[36],"across":[37,100,228],"the":[38,47,60,69,74,82,86,101,116,146,162,167,178,208,219],"2D":[39,102,164],"array.":[40],"In":[41,196],"this":[42],"work,":[43],"we":[44,77,91,122,200,232],"present":[45],"AIE4ML,":[46],"first":[48],"comprehensive":[49],"framework":[50,144,179],"for":[51,68,134,248],"converting":[52],"models":[54,183],"automatically":[55],"into":[56],"optimized":[57],"firmware":[58],"targeting":[59],"AIE-ML":[61,103],"generation":[62,147],"devices,":[63],"also":[64],"with":[65,131,221],"forward":[66],"compatibility":[67],"newer":[70],"AIE-MLv2":[71],"architecture.":[72],"At":[73,85],"single-kernel":[75,209],"level,":[76],"attain":[78],"performance":[79],"close":[80],"architectural":[83],"peak.":[84],"graph":[87,172],"system":[89],"levels,":[90],"provide":[92],"a":[93,120,124,170,245],"structured":[94],"parallelization":[95],"method":[96],"that":[97,234],"can":[98],"scale":[99],"fabric":[104],"exploit":[106],"its":[107],"dedicated":[108],"tiles":[110,216],"stay":[112],"entirely":[113,222],"on-chip":[114,223],"throughout":[115],"model":[117,230],"execution.":[118],"As":[119],"demonstration,":[121],"designed":[123],"generalized":[125],"highly":[127],"efficient":[128],"linear-layer":[129],"implementation":[130],"intrinsic":[132],"support":[133],"fused":[135],"bias":[136],"addition":[137],"ReLU":[139],"activation.":[140],"Also,":[141],"as":[142,189,252],"our":[143,151],"necessitates":[145],"of":[148,166,213,218],"multi-layer":[149],"implementations,":[150],"approach":[152],"systematically":[153],"derives":[154],"deterministic,":[155],"compact,":[156],"topology-optimized":[158],"placements":[159],"tailored":[160],"physical":[163],"grid":[165],"device":[168,220],"through":[169],"novel":[171],"placement":[173],"search":[175],"algorithm.":[176],"Finally,":[177],"seamlessly":[180],"accepts":[181],"quantized":[182],"imported":[184],"from":[185],"high-level":[186],"tools":[187],"such":[188,251],"hls4ml":[190],"or":[191],"PyTorch":[192],"while":[193],"preserving":[194],"bit-exactness.":[195],"layer":[197],"scaling":[198],"benchmarks,":[199],"achieve":[201],"up":[202],"98.6%":[204],"efficiency":[205],"relative":[206],"baseline,":[210],"utilizing":[211],"296":[212],"304":[214],"(97.4%)":[217],"data":[224],"movement.":[225],"With":[226],"evaluations":[227],"real-world":[229],"topologies,":[231],"demonstrate":[233],"AIE4ML":[235],"delivers":[236],"GPU-class":[237],"throughput":[238],"under":[239],"microsecond":[240],"latency":[241],"constraints,":[242],"making":[243],"it":[244],"practical":[246],"companion":[247],"ultra-low-latency":[249],"environments":[250],"trigger":[253],"systems":[254],"in":[255],"particle":[256],"physics":[257],"experiments.":[258]},"counts_by_year":[],"updated_date":"2026-06-12T06:20:11.936012","created_date":"2025-12-21T00:00:00"}
