{"id":"https://openalex.org/W7154747635","doi":"https://doi.org/10.48550/arxiv.2604.14700","title":"Accelerating CRONet on AMD Versal AIE-ML Engines","display_name":"Accelerating CRONet on AMD Versal AIE-ML Engines","publication_year":2026,"publication_date":"2026-04-16","ids":{"openalex":"https://openalex.org/W7154747635","doi":"https://doi.org/10.48550/arxiv.2604.14700"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.14700","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14700","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.14700","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133853383","display_name":"Kaustubh Mhatre","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mhatre, Kaustubh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105012335","display_name":"Vedant Tewari","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tewari, Vedant","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133910440","display_name":"Aditya Ray","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ray, Aditya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133560892","display_name":"Dr. Farhan Khan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khan, Farhan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092952141","display_name":"Ridwan Olabiyi","orcid":"https://orcid.org/0009-0003-4720-2304"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Olabiyi, Ridwan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133916002","display_name":"Ashif Iquebal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Iquebal, Ashif","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133900510","display_name":"Aman Arora","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arora, Aman","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11115","display_name":"Topology Optimization in Engineering","score":0.781499981880188,"subfield":{"id":"https://openalex.org/subfields/2205","display_name":"Civil and Structural Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11115","display_name":"Topology Optimization in Engineering","score":0.781499981880188,"subfield":{"id":"https://openalex.org/subfields/2205","display_name":"Civil and Structural Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10100","display_name":"Metaheuristic Optimization Algorithms Research","score":0.039900001138448715,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.0333000011742115,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5598999857902527},{"id":"https://openalex.org/keywords/network-topology","display_name":"Network topology","score":0.5503000020980835},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5432999730110168},{"id":"https://openalex.org/keywords/topology","display_name":"Topology (electrical circuits)","score":0.5187000036239624},{"id":"https://openalex.org/keywords/topology-optimization","display_name":"Topology optimization","score":0.47909998893737793},{"id":"https://openalex.org/keywords/dram","display_name":"Dram","score":0.44620001316070557},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.44359999895095825},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.35260000824928284}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7671999931335449},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5598999857902527},{"id":"https://openalex.org/C199845137","wikidata":"https://www.wikidata.org/wiki/Q145490","display_name":"Network topology","level":2,"score":0.5503000020980835},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5432999730110168},{"id":"https://openalex.org/C184720557","wikidata":"https://www.wikidata.org/wiki/Q7825049","display_name":"Topology (electrical circuits)","level":2,"score":0.5187000036239624},{"id":"https://openalex.org/C189216461","wikidata":"https://www.wikidata.org/wiki/Q2443456","display_name":"Topology optimization","level":3,"score":0.47909998893737793},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.45089998841285706},{"id":"https://openalex.org/C7366592","wikidata":"https://www.wikidata.org/wiki/Q1255620","display_name":"Dram","level":2,"score":0.44620001316070557},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.44359999895095825},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.35260000824928284},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3508000075817108},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.32100000977516174},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3068999946117401},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.3068999946117401},{"id":"https://openalex.org/C135628077","wikidata":"https://www.wikidata.org/wiki/Q220184","display_name":"Finite element method","level":2,"score":0.30630001425743103},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3050000071525574},{"id":"https://openalex.org/C123745756","wikidata":"https://www.wikidata.org/wiki/Q1665949","display_name":"Interconnection","level":2,"score":0.2870999872684479},{"id":"https://openalex.org/C193702766","wikidata":"https://www.wikidata.org/wiki/Q1414548","display_name":"Concurrency","level":2,"score":0.28279998898506165},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.28209999203681946},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.27649998664855957},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2565999925136566},{"id":"https://openalex.org/C193415008","wikidata":"https://www.wikidata.org/wiki/Q639681","display_name":"Network architecture","level":2,"score":0.25429999828338623},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.2524000108242035}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.14700","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14700","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.14700","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14700","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.5336927771568298,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Topology":[0],"optimization":[1,50,55,120],"is":[2,51],"a":[3,14,62,113,118],"computational":[4],"method":[5],"used":[6],"to":[7,19,78,144,157,197,204,211],"determine":[8],"the":[9,125,136,146,155,166,217,231],"optimal":[10],"material":[11],"distribution":[12],"within":[13],"prescribed":[15],"design":[16],"domain,":[17],"aiming":[18],"minimize":[20],"structural":[21,35],"weight":[22],"while":[23,84],"satisfying":[24],"load":[25],"and":[26,40,93,103,138,173,202],"boundary":[27],"conditions.":[28],"For":[29],"critical":[30],"infrastructure":[31],"applications,":[32],"such":[33],"as":[34],"health":[36],"monitoring":[37],"of":[38,117,141,148,233],"bridges":[39],"buildings,":[41],"particularly":[42],"in":[43,68,100,200,207,216],"digital":[44],"twin":[45],"contexts,":[46],"low-latency":[47,240],"energy-efficient":[48,241],"topology":[49,54,119,242],"essential.":[52],"Traditionally,":[53],"relies":[56],"on":[57,96,124,165,183],"finite":[58],"element":[59],"analysis":[60],"(FEA),":[61],"computationally":[63],"intensive":[64],"process.":[65],"Recent":[66],"advances":[67],"deep":[69],"neural":[70,121,150,161],"networks":[71],"(DNNs)":[72],"have":[73,90],"introduced":[74],"data":[75,187],"driven":[76],"alternatives":[77],"FEA,":[79],"substantially":[80],"reducing":[81],"computation":[82],"time":[83],"maintaining":[85],"solution":[86],"quality.":[87],"These":[88,228],"DNNs":[89],"complex":[91],"architectures":[92],"implementing":[94],"them":[95],"inference-class":[97,213],"GPUs":[98],"results":[99,190,229],"high":[101],"latency":[102,201],"poor":[104],"energy":[105,208],"efficiency.":[106],"To":[107],"address":[108],"this":[109],"challenge,":[110],"we":[111],"present":[112],"hardware":[114],"accelerated":[115],"implementation":[116,194],"network":[122,151,162,174],"(CRONet)":[123],"AMD":[126],"Versal":[127,234],"AI":[128],"Engine-ML":[129],"(AIE-ML)":[130],"architecture.":[131],"Our":[132],"approach":[133],"efficiently":[134],"exploits":[135],"parallelism":[137],"memory":[139],"hierarchy":[140],"AIE-ML":[142,167,235],"engines":[143],"optimize":[145],"execution":[147],"various":[149],"operators.":[152],"We":[153],"are":[154],"first":[156],"implement":[158],"an":[159,212],"end-to-end":[160],"fully":[163],"realized":[164],"array,":[168],"where":[169],"all":[170],"intermediate":[171,186],"activations":[172],"weights":[175],"reside":[176],"on-chip":[177],"throughout":[178],"inference,":[179],"eliminating":[180],"any":[181],"reliance":[182],"DRAM":[184],"for":[185,225,238],"movement.":[188],"Experimental":[189],"demonstrate":[191],"that":[192],"our":[193],"achieves":[195],"up":[196,203],"2.49x":[198],"improvement":[199,206],"4.18x":[205],"efficiency":[209],"compared":[210],"ML-optimized":[214],"GPU":[215],"same":[218],"power":[219],"budget":[220],"(Nvidia":[221],"T4)":[222],"after":[223],"scaling":[224],"technology":[226],"node.":[227],"highlight":[230],"potential":[232],"based":[236],"acceleration":[237],"enabling":[239],"optimization.":[243]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-18T00:00:00"}
