{"id":"https://openalex.org/W2092795222","doi":"https://doi.org/10.1145/1463768.1463781","title":"An efficient in-place 3D transpose for multicore processors with software managed memory hierarchy","display_name":"An efficient in-place 3D transpose for multicore processors with software managed memory hierarchy","publication_year":2008,"publication_date":"2008-11-24","ids":{"openalex":"https://openalex.org/W2092795222","doi":"https://doi.org/10.1145/1463768.1463781","mag":"2092795222"},"language":"en","primary_location":{"id":"doi:10.1145/1463768.1463781","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1463768.1463781","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 1st international forum on Next-generation multicore/manycore technologies","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087689650","display_name":"Ali A. El\u2010Moursy","orcid":"https://orcid.org/0000-0002-3660-6544"},"institutions":[{"id":"https://openalex.org/I4210156128","display_name":"Electronics Research Institute","ror":"https://ror.org/0532wcf75","country_code":"EG","type":"facility","lineage":["https://openalex.org/I4210094263","https://openalex.org/I4210156128"]}],"countries":["EG"],"is_corresponding":true,"raw_author_name":"Ali El-Moursy","raw_affiliation_strings":["Electronics Research Institute, Giza, Egypt"],"affiliations":[{"raw_affiliation_string":"Electronics Research Institute, Giza, Egypt","institution_ids":["https://openalex.org/I4210156128"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020863121","display_name":"Ahmed El-Mahdy","orcid":"https://orcid.org/0000-0001-9736-1352"},"institutions":[{"id":"https://openalex.org/I84524832","display_name":"Alexandria University","ror":"https://ror.org/00mzz1w90","country_code":"EG","type":"education","lineage":["https://openalex.org/I84524832"]}],"countries":["EG"],"is_corresponding":false,"raw_author_name":"Ahmed El-Mahdy","raw_affiliation_strings":["Alexandria University, Alexandria, Egypt"],"affiliations":[{"raw_affiliation_string":"Alexandria University, Alexandria, Egypt","institution_ids":["https://openalex.org/I84524832"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044241169","display_name":"Hisham El\u2010Shishiny","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]},{"id":"https://openalex.org/I4210152544","display_name":"IBM (Egypt)","ror":"https://ror.org/04zpt4h82","country_code":"EG","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210152544"]}],"countries":["EG","US"],"is_corresponding":false,"raw_author_name":"Hisham El-Shishiny","raw_affiliation_strings":["IBM Centre for Advanced Studies in Cairo, IBM WTC, El-Ahram, Giza, Egypt","IBM Centre for Advanced Studies in Cairo, IBM WTC, El-Ahram, Giza, Egypt#TAB#"],"affiliations":[{"raw_affiliation_string":"IBM Centre for Advanced Studies in Cairo, IBM WTC, El-Ahram, Giza, Egypt","institution_ids":["https://openalex.org/I4210152544"]},{"raw_affiliation_string":"IBM Centre for Advanced Studies in Cairo, IBM WTC, El-Ahram, Giza, Egypt#TAB#","institution_ids":["https://openalex.org/I1341412227"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5087689650"],"corresponding_institution_ids":["https://openalex.org/I4210156128"],"apc_list":null,"apc_paid":null,"fwci":0.3454,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.6855783,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.9837999939918518,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.9825999736785889,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transpose","display_name":"Transpose","score":0.8742426633834839},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8323677778244019},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.8137528896331787},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.6169419288635254},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6090137958526611},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.5656725168228149},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.516598105430603},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.5018563270568848},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.47072163224220276},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.45656806230545044},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.4305751919746399},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.10234850645065308},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.091143399477005},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.08828425407409668}],"concepts":[{"id":"https://openalex.org/C200106649","wikidata":"https://www.wikidata.org/wiki/Q223683","display_name":"Transpose","level":3,"score":0.8742426633834839},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8323677778244019},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.8137528896331787},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.6169419288635254},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6090137958526611},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.5656725168228149},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.516598105430603},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.5018563270568848},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.47072163224220276},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.45656806230545044},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.4305751919746399},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.10234850645065308},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.091143399477005},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08828425407409668},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1463768.1463781","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1463768.1463781","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 1st international forum on Next-generation multicore/manycore technologies","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.4000000059604645,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W223046992","https://openalex.org/W1499615395","https://openalex.org/W1539796472","https://openalex.org/W2079726719","https://openalex.org/W2099244661","https://openalex.org/W2198284965","https://openalex.org/W2267922254","https://openalex.org/W4249511997"],"related_works":["https://openalex.org/W1851212222","https://openalex.org/W1509422975","https://openalex.org/W2092795222","https://openalex.org/W2735130281","https://openalex.org/W1990309876","https://openalex.org/W79990711","https://openalex.org/W4295935130","https://openalex.org/W1480947737","https://openalex.org/W2953368509","https://openalex.org/W3010779417"],"abstract_inverted_index":{"3D":[0,27],"transpose":[1,28],"is":[2,79,99,135],"an":[3],"important":[4],"operation":[5],"in":[6,50],"many":[7],"large":[8,119],"scale":[9],"scientific":[10],"applications":[11],"such":[12,55],"as":[13,56],"seismic":[14],"and":[15,90,122,163],"medical":[16],"imaging.":[17],"This":[18],"paper":[19],"proposes":[20],"a":[21,92,147],"novel":[22],"algorithm":[23,31,65],"for":[24,112,118,128,140],"fast":[25,113],"in-place":[26],"operation.":[29],"The":[30,64,132],"exploits":[32],"Single":[33],"Instruction":[34],"Multiple":[35],"Data":[36],"(SIMD)":[37],"multicore":[38],"architecture":[39],"with":[40],"software":[41],"managed":[42],"memory":[43],"hierarchy.":[44],"Such":[45,105],"architectural":[46],"features":[47],"are":[48],"present":[49],"the":[51,57,84,108,124,152],"next":[52],"generation":[53],"processors,":[54],"Cell":[58,153],"Broadband":[59],"Engine":[60],"(Cell":[61],"BE)":[62],"processor.":[63],"performs":[66],"transposition":[67,78,98],"at":[68,73,87,91,123],"two":[69],"levels":[70],"of":[71,110,159],"granularity:":[72],"coarse":[74],"level,":[75,95],"where":[76,96],"logical":[77],"done":[80,100],"by":[81,101,116,138],"merely":[82],"transposing":[83,142],"address":[85],"map":[86],"each":[88],"access;":[89],"fine":[93],"grain":[94],"physical":[97],"actual":[102],"element":[103],"displacement/swapping.":[104],"mix":[106],"combines":[107],"benefits":[109],"allowing":[111,139],"on-chip":[114,160],"bandwidth":[115],"providing":[117],"transfer":[120,133],"sizes,":[121],"same":[125],"time":[126],"allows":[127],"fine-grain":[129],"SIMD":[130],"operations.":[131],"rate":[134],"further":[136],"enhanced":[137],"batch":[141],"spatially":[143],"joined":[144],"data":[145],"along":[146],"major":[148],"axis.":[149],"Results":[150],"on":[151],"BE":[154],"processor":[155],"show":[156],"substantial":[157],"utilisation":[158],"communication":[161],"bandwidth,":[162],"negligible":[164],"processing":[165],"time.":[166]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2015,"cited_by_count":2},{"year":2013,"cited_by_count":3}],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-10T00:00:00"}
