{"id":"https://openalex.org/W4409585609","doi":"https://doi.org/10.1145/3730582","title":"DCMA: Accelerating Parallel DMA Transfers with a Multi-Port Direct Cached Memory Access in a Massive-Parallel Vector Processor","display_name":"DCMA: Accelerating Parallel DMA Transfers with a Multi-Port Direct Cached Memory Access in a Massive-Parallel Vector Processor","publication_year":2025,"publication_date":"2025-04-18","ids":{"openalex":"https://openalex.org/W4409585609","doi":"https://doi.org/10.1145/3730582"},"language":"en","primary_location":{"id":"doi:10.1145/3730582","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3730582","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1145/3730582","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039886741","display_name":"Gia Bao Thieu","orcid":"https://orcid.org/0000-0002-4045-3771"},"institutions":[{"id":"https://openalex.org/I94509681","display_name":"Technische Universit\u00e4t Braunschweig","ror":"https://ror.org/010nsgg66","country_code":"DE","type":"education","lineage":["https://openalex.org/I94509681"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Gia Bao Thieu","raw_affiliation_strings":["Chair for Chip Design for Embedded Computing, TU Braunschweig","Chair for Chip Design for Embedded Computing, TU Braunschweig, Braunschweig, Germany"],"raw_orcid":"https://orcid.org/0000-0002-4045-3771","affiliations":[{"raw_affiliation_string":"Chair for Chip Design for Embedded Computing, TU Braunschweig","institution_ids":["https://openalex.org/I94509681"]},{"raw_affiliation_string":"Chair for Chip Design for Embedded Computing, TU Braunschweig, Braunschweig, Germany","institution_ids":["https://openalex.org/I94509681"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000412239","display_name":"Sven Gesper","orcid":"https://orcid.org/0000-0002-3570-1638"},"institutions":[{"id":"https://openalex.org/I94509681","display_name":"Technische Universit\u00e4t Braunschweig","ror":"https://ror.org/010nsgg66","country_code":"DE","type":"education","lineage":["https://openalex.org/I94509681"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sven Gesper","raw_affiliation_strings":["Chair for Chip Design for Embedded Computing, TU Braunschweig","Chair for Chip Design for Embedded Computing, TU Braunschweig, Braunschweig, Germany"],"raw_orcid":"https://orcid.org/0000-0002-3570-1638","affiliations":[{"raw_affiliation_string":"Chair for Chip Design for Embedded Computing, TU Braunschweig","institution_ids":["https://openalex.org/I94509681"]},{"raw_affiliation_string":"Chair for Chip Design for Embedded Computing, TU Braunschweig, Braunschweig, Germany","institution_ids":["https://openalex.org/I94509681"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038123706","display_name":"Guillermo Pay\u00e1\u2013Vay\u00e1","orcid":"https://orcid.org/0000-0003-3503-8386"},"institutions":[{"id":"https://openalex.org/I94509681","display_name":"Technische Universit\u00e4t Braunschweig","ror":"https://ror.org/010nsgg66","country_code":"DE","type":"education","lineage":["https://openalex.org/I94509681"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Guillermo Pay\u00e1-Vay\u00e1","raw_affiliation_strings":["Chair for Chip Design for Embedded Computing, TU Braunschweig","Chair for Chip Design for Embedded Computing, TU Braunschweig, Braunschweig, Germany"],"raw_orcid":"https://orcid.org/0000-0003-3503-8386","affiliations":[{"raw_affiliation_string":"Chair for Chip Design for Embedded Computing, TU Braunschweig","institution_ids":["https://openalex.org/I94509681"]},{"raw_affiliation_string":"Chair for Chip Design for Embedded Computing, TU Braunschweig, Braunschweig, Germany","institution_ids":["https://openalex.org/I94509681"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5039886741"],"corresponding_institution_ids":["https://openalex.org/I94509681"],"apc_list":null,"apc_paid":null,"fwci":2.1546,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.83640103,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"22","issue":"2","first_page":"1","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8510454893112183},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7464751601219177},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5887635350227356},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.4594676196575165},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.369259774684906},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3585967421531677}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8510454893112183},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7464751601219177},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5887635350227356},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.4594676196575165},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.369259774684906},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3585967421531677}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3730582","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3730582","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3730582","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3730582","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1791189328","https://openalex.org/W1978111236","https://openalex.org/W1999085092","https://openalex.org/W2034628162","https://openalex.org/W2087329614","https://openalex.org/W2087539664","https://openalex.org/W2098361319","https://openalex.org/W2120114427","https://openalex.org/W2137378081","https://openalex.org/W2163687928","https://openalex.org/W2181961178","https://openalex.org/W2194775991","https://openalex.org/W2289252105","https://openalex.org/W2540279855","https://openalex.org/W2606722458","https://openalex.org/W2761557516","https://openalex.org/W2796438033","https://openalex.org/W2888727064","https://openalex.org/W2945146780","https://openalex.org/W2949650786","https://openalex.org/W2950775702","https://openalex.org/W2962835968","https://openalex.org/W2962970995","https://openalex.org/W2963163009","https://openalex.org/W3024438256","https://openalex.org/W3036878841","https://openalex.org/W3155922894","https://openalex.org/W3190062760","https://openalex.org/W4206336135","https://openalex.org/W4230326142","https://openalex.org/W4379116059"],"related_works":["https://openalex.org/W2133682266","https://openalex.org/W2497617944","https://openalex.org/W2167303720","https://openalex.org/W1563139915","https://openalex.org/W2109715593","https://openalex.org/W2061075966","https://openalex.org/W3147501184","https://openalex.org/W2268996566","https://openalex.org/W4256652509","https://openalex.org/W2140219379"],"abstract_inverted_index":{"State-of-the-art":[0],"applications,":[1],"such":[2,24],"as":[3],"convolutional":[4],"neural":[5],"networks,":[6],"demand":[7],"specialized":[8],"hardware":[9,25],"accelerators":[10,44],"that":[11],"address":[12],"performance":[13],"and":[14,87,118,121,153],"efficiency":[15],"constraints.":[16],"An":[17],"efficient":[18,150],"memory":[19,29,74,135,155,201],"hierarchy":[20],"is":[21,172],"mandatory":[22],"for":[23,126,149,159,213],"systems.":[26],"While":[27],"the":[28,48,56,73,82,130,160,195,198,205,214],"architectures":[30],"of":[31,59,76,197,209],"general-purpose":[32],"processors":[33],"(e.g.,":[34],"CPU":[35],"or":[36,68],"GPUs)":[37],"are":[38,162],"based":[39],"on":[40,174],"cache":[41,95,119],"systems,":[42,96],"dedicated":[43],"have":[45],"mostly":[46],"adopted":[47],"DMA":[49,62,80,117,141],"(Direct":[50],"Memory":[51,111],"Access)":[52],"concept":[53],"due":[54],"to":[55,84,211],"application":[57],"field":[58],"image":[60,77],"processing.":[61,78],"features":[63],"like":[64],"2D":[65],"data":[66,69,89,151],"transfers":[67],"padding":[70],"can":[71,217],"optimize":[72],"accesses":[75],"However,":[79],"lacks":[81],"capability":[83],"exploit":[85],"temporal":[86],"spatial":[88],"reuse,":[90],"a":[91,107,165,175,183],"feature":[92],"common":[93],"in":[94,102],"particularly":[97],"when":[98],"multiple":[99],"DMAs":[100],"operate":[101],"parallel.":[103],"This":[104,146],"article":[105],"proposes":[106],"novel":[108],"Direct":[109],"Cached":[110],"Access":[112],"(DCMA)":[113],"architecture,":[114],"combining":[115],"both":[116],"methodologies":[120],"their":[122],"respective":[123],"advantages.":[124],"Optimized":[125],"image-based":[127],"AI":[128],"algorithms,":[129],"DCMA":[131,161,171],"architecture":[132],"facilitates":[133],"enhanced":[134],"access":[136],"by":[137],"integrating":[138],"multiple,":[139],"parallel":[140,154],"ports":[142],"with":[143,182],"caching":[144],"capabilities.":[145],"design":[147,167],"allows":[148],"reuse":[152],"access.":[156],"Optimal":[157],"parameters":[158],"determined":[163],"through":[164],"comprehensive":[166],"space":[168],"exploration.":[169],"The":[170,192],"evaluated":[173],"state-of-the-art":[176],"Xilinx":[177],"UltraScale+":[178],"FPGA":[179],"board":[180],"coupled":[181],"massive-parallel":[184],"vertical":[185],"vector":[186,199],"co-processor,":[187],"called":[188],"V":[189],"2":[190],"PRO.":[191],"results":[193],"show":[194],"mitigation":[196],"processor\u2019s":[200],"bottleneck.":[202],"By":[203],"using":[204],"proposed":[206],"DCMA,":[207],"speedups":[208],"up":[210],"\u00d717":[212],"ResNet-50":[215],"CNN":[216],"be":[218],"achieved.":[219]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
