{"id":"https://openalex.org/W4407319894","doi":"https://doi.org/10.1145/3716873","title":"VersaTile: Flexible Tiled Architectures via Associative Processors","display_name":"VersaTile: Flexible Tiled Architectures via Associative Processors","publication_year":2025,"publication_date":"2025-02-10","ids":{"openalex":"https://openalex.org/W4407319894","doi":"https://doi.org/10.1145/3716873"},"language":"en","primary_location":{"id":"doi:10.1145/3716873","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3716873","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1145/3716873","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100435640","display_name":"Kailin Yang","orcid":"https://orcid.org/0000-0002-7762-2751"},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kailin Yang","raw_affiliation_strings":["Computer Systems Laboratory, Cornell University","Electrical & Computer Engineering, Cornell University, Ithaca, United States"],"raw_orcid":"https://orcid.org/0000-0002-7762-2751","affiliations":[{"raw_affiliation_string":"Computer Systems Laboratory, Cornell University","institution_ids":["https://openalex.org/I205783295"]},{"raw_affiliation_string":"Electrical & Computer Engineering, Cornell University, Ithaca, United States","institution_ids":["https://openalex.org/I205783295"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083127721","display_name":"Jos\u00e9 F. Mart\u00ednez","orcid":"https://orcid.org/0000-0001-5451-5681"},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jos\u00e9 F. Mart\u00ednez","raw_affiliation_strings":["Computer Systems Laboratory, Cornell University","School of Electrical & Computer Engineering, Cornell University, Ithaca, United States"],"raw_orcid":"https://orcid.org/0000-0001-5451-5681","affiliations":[{"raw_affiliation_string":"Computer Systems Laboratory, Cornell University","institution_ids":["https://openalex.org/I205783295"]},{"raw_affiliation_string":"School of Electrical & Computer Engineering, Cornell University, Ithaca, United States","institution_ids":["https://openalex.org/I205783295"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I205783295"],"apc_list":null,"apc_paid":null,"fwci":1.0464,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.74782644,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"22","issue":"2","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12029","display_name":"DNA and Biological Computing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12784","display_name":"Modular Robots and Swarm Intelligence","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8272397518157959},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5776675343513489},{"id":"https://openalex.org/keywords/associative-property","display_name":"Associative property","score":0.5255916118621826},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.5193270444869995}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8272397518157959},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5776675343513489},{"id":"https://openalex.org/C159423971","wikidata":"https://www.wikidata.org/wiki/Q177251","display_name":"Associative property","level":2,"score":0.5255916118621826},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.5193270444869995},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3716873","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3716873","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3716873","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3716873","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306087","display_name":"Semiconductor Research Corporation","ror":"https://ror.org/047z4n946"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W1521947899","https://openalex.org/W1965061255","https://openalex.org/W1969349120","https://openalex.org/W2058490651","https://openalex.org/W2066851213","https://openalex.org/W2074084814","https://openalex.org/W2104644701","https://openalex.org/W2112547256","https://openalex.org/W2112980698","https://openalex.org/W2135601680","https://openalex.org/W2144481293","https://openalex.org/W2147657366","https://openalex.org/W2158996877","https://openalex.org/W2250636351","https://openalex.org/W2331783522","https://openalex.org/W2352050502","https://openalex.org/W2396622873","https://openalex.org/W2471077310","https://openalex.org/W2613264175","https://openalex.org/W2613569094","https://openalex.org/W2613989746","https://openalex.org/W2765234579","https://openalex.org/W2766489088","https://openalex.org/W2773750423","https://openalex.org/W2801000640","https://openalex.org/W2949989598","https://openalex.org/W2950138172","https://openalex.org/W3042944763","https://openalex.org/W3043089744","https://openalex.org/W3123542955","https://openalex.org/W3155004489","https://openalex.org/W3159441626","https://openalex.org/W3189166979","https://openalex.org/W4239722617","https://openalex.org/W4245124044","https://openalex.org/W4281732221","https://openalex.org/W4282041904","https://openalex.org/W4285121610","https://openalex.org/W4308083915","https://openalex.org/W4386763619","https://openalex.org/W4387064011","https://openalex.org/W4387064057","https://openalex.org/W4389584391","https://openalex.org/W4404954750"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"As":[0],"modern":[1],"applications":[2,206],"demand":[3],"more":[4],"data,":[5],"processing-in-memory":[6],"(PIM)":[7],"architectures":[8],"have":[9],"emerged":[10],"to":[11,118,179,188,195,202,223,235],"address":[12],"the":[13,54,75,96,167],"challenges":[14],"of":[15,45,98,129,148,231],"data":[16],"movement":[17],"and":[18,39,51,57,70,79,85,110,171,191,211,219],"parallelism.":[19],"In":[20],"this":[21],"article,":[22],"we":[23,89],"propose":[24],"VersaTile,":[25],"a":[26,43,81,93,102,126,133,138,145,174],"heterogeneous,":[27],"fully":[28],"CMOS-based":[29],"tiled":[30],"architecture":[31,121],"that":[32],"combines":[33],"conventional":[34],"out-of-order":[35],"(OoO)":[36],"superscalar":[37],"CPUs":[38,50,69],"associative":[40],"processors":[41],"(APs),":[42],"type":[44],"CAM-based":[46],"PIM":[47],"core.":[48],"Both":[49],"APs":[52,72,100,218],"leverage":[53],"RISC-V":[55],"ISA":[56],"its":[58],"standard":[59],"RVV":[60],"vector":[61,104],"extension.":[62],"VersaTile":[63,161,215],"fosters":[64],"collaboration":[65],"between":[66],"multiple":[67,99],"low-latency":[68],"high-throughput":[71],"by":[73],"sharing":[74],"same":[76],"software":[77],"stack":[78],"adopting":[80],"common":[82],"CPU":[83,182],"programming":[84,112],"compilation":[86],"frontend.":[87],"Moreover,":[88],"introduce":[90],"tile":[91],"stitching,":[92],"mechanism":[94],"enabling":[95],"aggregation":[97],"into":[101],"single":[103],"super-unit":[105],"with":[106,177,184,193,207],"modest":[107],"hardware":[108],"support":[109],"no":[111],"effort.":[113],"Tile":[114],"stitching":[115],"allows":[116],"us":[117],"configure":[119],"an":[120,196],"for":[122],"optimal":[123],"performance":[124,226],"across":[125,166],"wide":[127],"range":[128],"applications.":[130],"We":[131],"provide":[132],"detailed":[134],"case":[135],"study,":[136],"including":[137],"scalable":[139],"floorplan":[140],"example,":[141],"as":[142,144],"well":[143],"comprehensive":[146],"evaluation":[147],"various":[149],"design":[150],"points.":[151],"Our":[152],"experiments":[153],"show":[154],"that,":[155],"when":[156],"using":[157,228],"only":[158],"AP":[159,199],"tiles,":[160],"can":[162,216],"achieve,":[163],"on":[164],"average":[165],"Phoenix":[168],"benchmark":[169],"suite":[170],"3D":[172],"convolution,":[173],"\\(5.7\\times\\)":[175],"speedup":[176],"respect":[178,194],"area-equivalent":[180],"OoO":[181,220],"cores":[183,221],"SIMD":[185],"ALUs":[186],"(up":[187,201],"\\(23\\times\\)":[189],"),":[190],"\\(4.6\\times\\)":[192],"equivalent-sized":[197],"monolithic":[198],"baseline":[200],"\\(29\\times\\)":[203],").":[204],"For":[205],"both":[208],"DLP":[209],"(vector)":[210],"ILP":[212],"(scalar)":[213],"regions,":[214],"use":[217],"collaboratively":[222],"achieve":[224],"better":[225],"than":[227],"either":[229],"one":[230],"them":[232],"only,":[233],"up":[234],"\\(4.4\\times\\)":[236],".":[237]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
