{"id":"https://openalex.org/W4408886225","doi":"https://doi.org/10.1145/3712031.3712033","title":"ITTPD: In-place Tensor Transposition with Permutation Decomposition on GPUs","display_name":"ITTPD: In-place Tensor Transposition with Permutation Decomposition on GPUs","publication_year":2025,"publication_date":"2025-02-19","ids":{"openalex":"https://openalex.org/W4408886225","doi":"https://doi.org/10.1145/3712031.3712033"},"language":"en","primary_location":{"id":"doi:10.1145/3712031.3712033","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3712031.3712033","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3712031.3712033?download=true","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on High Performance Computing in Asia-Pacific Region","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3712031.3712033?download=true","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065950372","display_name":"Kai-Jung Cheng","orcid":"https://orcid.org/0009-0005-0620-6442"},"institutions":[{"id":"https://openalex.org/I25846049","display_name":"National Tsing Hua University","ror":"https://ror.org/00zdnkx70","country_code":"TW","type":"education","lineage":["https://openalex.org/I25846049"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Kai-Jung Cheng","raw_affiliation_strings":["National Tsing Hua University, Hsinchu, Taiwan"],"raw_orcid":"https://orcid.org/0009-0005-0620-6442","affiliations":[{"raw_affiliation_string":"National Tsing Hua University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I25846049"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058589126","display_name":"Che\u2013Rung Lee","orcid":"https://orcid.org/0000-0003-3940-4478"},"institutions":[{"id":"https://openalex.org/I25846049","display_name":"National Tsing Hua University","ror":"https://ror.org/00zdnkx70","country_code":"TW","type":"education","lineage":["https://openalex.org/I25846049"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Che-Rung Lee","raw_affiliation_strings":["National Tsing Hua University, Hsinchu, Taiwan"],"raw_orcid":"https://orcid.org/0000-0003-3940-4478","affiliations":[{"raw_affiliation_string":"National Tsing Hua University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I25846049"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5065950372"],"corresponding_institution_ids":["https://openalex.org/I25846049"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02456351,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"90","last_page":"98"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13664","display_name":"Genome Rearrangement Algorithms","score":0.9839000105857849,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9559999704360962,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transposition","display_name":"Transposition (logic)","score":0.7302167415618896},{"id":"https://openalex.org/keywords/permutation","display_name":"Permutation (music)","score":0.615321934223175},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5977817177772522},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5041824579238892},{"id":"https://openalex.org/keywords/decomposition","display_name":"Decomposition","score":0.4963863492012024},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.49213331937789917},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21472889184951782},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.15785014629364014},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.09814554452896118},{"id":"https://openalex.org/keywords/pure-mathematics","display_name":"Pure mathematics","score":0.08493533730506897},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.04818159341812134}],"concepts":[{"id":"https://openalex.org/C12455157","wikidata":"https://www.wikidata.org/wiki/Q7835331","display_name":"Transposition (logic)","level":2,"score":0.7302167415618896},{"id":"https://openalex.org/C21308566","wikidata":"https://www.wikidata.org/wiki/Q7169365","display_name":"Permutation (music)","level":2,"score":0.615321934223175},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5977817177772522},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5041824579238892},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.4963863492012024},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.49213331937789917},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21472889184951782},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.15785014629364014},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.09814554452896118},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.08493533730506897},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.04818159341812134},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3712031.3712033","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3712031.3712033","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3712031.3712033?download=true","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on High Performance Computing in Asia-Pacific Region","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3712031.3712033","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3712031.3712033","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3712031.3712033?download=true","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on High Performance Computing in Asia-Pacific Region","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8564454847","display_name":null,"funder_award_id":"1132221E007140","funder_id":"https://openalex.org/F4320331164","funder_display_name":"National Science and Technology Council"}],"funders":[{"id":"https://openalex.org/F4320331164","display_name":"National Science and Technology Council","ror":"https://ror.org/00wnb9798"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4408886225.pdf","grobid_xml":"https://content.openalex.org/works/W4408886225.grobid-xml"},"referenced_works_count":45,"referenced_works":["https://openalex.org/W1536222765","https://openalex.org/W1987882202","https://openalex.org/W2006826424","https://openalex.org/W2006973305","https://openalex.org/W2019463754","https://openalex.org/W2023749136","https://openalex.org/W2024165284","https://openalex.org/W2028122240","https://openalex.org/W2032473328","https://openalex.org/W2037271374","https://openalex.org/W2037365376","https://openalex.org/W2051325990","https://openalex.org/W2054600183","https://openalex.org/W2067425328","https://openalex.org/W2070449673","https://openalex.org/W2083746907","https://openalex.org/W2092795222","https://openalex.org/W2093135480","https://openalex.org/W2103333868","https://openalex.org/W2111309482","https://openalex.org/W2113055885","https://openalex.org/W2149381887","https://openalex.org/W2152240519","https://openalex.org/W2157622572","https://openalex.org/W2262548874","https://openalex.org/W2268122618","https://openalex.org/W2466677533","https://openalex.org/W2471966094","https://openalex.org/W2528701899","https://openalex.org/W2588061952","https://openalex.org/W2763373928","https://openalex.org/W2885972351","https://openalex.org/W2939295767","https://openalex.org/W2953907816","https://openalex.org/W2963137752","https://openalex.org/W2963482281","https://openalex.org/W2991072578","https://openalex.org/W3014060182","https://openalex.org/W3099221095","https://openalex.org/W3101321165","https://openalex.org/W3106450156","https://openalex.org/W3210690626","https://openalex.org/W4293075156","https://openalex.org/W4312544421","https://openalex.org/W4392995987"],"related_works":["https://openalex.org/W2073700517","https://openalex.org/W4288062113","https://openalex.org/W3151485003","https://openalex.org/W2356141215","https://openalex.org/W2361312893","https://openalex.org/W3215624820","https://openalex.org/W4292765704","https://openalex.org/W4207057333","https://openalex.org/W2077637320","https://openalex.org/W2085398123"],"abstract_inverted_index":{"Tensor":[0,56],"transposition":[1,86],"is":[2,46,122],"a":[3,61,170],"fundamental":[4],"operation":[5],"in":[6,22,27,32],"tensor":[7,21,104,110,175],"computations":[8],"with":[9,58,165],"broad":[10],"applications.Most":[11],"existing":[12,99],"algorithms":[13],"rely":[14],"on":[15],"out-of-place":[16,166],"transposition,":[17],"which":[18],"duplicates":[19],"the":[20,72,103,109,114,130],"memory":[23,34,69,115,126,148],"and":[24,88,117,150],"repositions":[25],"elements":[26],"their":[28],"transposed":[29],"order,":[30],"resulting":[31],"high":[33],"demands.On":[35],"memory-constrained":[36],"devices":[37],"such":[38],"as":[39,160,162],"Graphics":[40],"Processing":[41],"Units":[42],"(GPUs),":[43],"this":[44,50],"approach":[45],"often":[47],"impractical.To":[48],"address":[49],"challenge,":[51],"we":[52],"propose":[53],"ITTPD":[54,76,101,139,154],"(In-place":[55],"Transposition":[57],"Permutation":[59],"Decomposition),":[60],"GPU-based":[62],"method":[63],"designed":[64],"to":[65,71,98],"operate":[66],"within":[67,95],"strict":[68],"constraints.Similarly":[70],"previous":[73],"work":[74],"EITHOT,":[75],"utilizes":[77],"permutation":[78],"decomposition,":[79],"breaking":[80],"down":[81],"complex":[82],"permutations":[83],"into":[84,105],"simpler":[85],"primitives,":[87],"determining":[89],"an":[90],"execution":[91,131,152],"sequence":[92],"that":[93,138],"fits":[94],"available":[96],"memory.Comparing":[97],"work,":[100],"partitions":[102],"smaller":[106],"sections":[107],"for":[108,124,133,173],"size":[111],"larger":[112],"than":[113],"limits":[116],"transposes":[118],"each":[119,134],"independently.The":[120],"implementation":[121],"optimized":[123],"GPU":[125,144],"access":[127],"patterns,":[128],"improving":[129],"efficiency":[132],"primitive.Experimental":[135],"results":[136],"show":[137],"significantly":[140],"outperforms":[141],"state-of-the-art":[142],"out-ofplace":[143],"implementations,":[145],"offering":[146],"reduced":[147],"overhead":[149],"faster":[151],"times.Moreover,":[153],"can":[155],"process":[156],"tensors":[157],"nearly":[158],"twice":[159],"large":[161],"those":[163],"feasible":[164],"methods,":[167],"making":[168],"it":[169],"versatile":[171],"solution":[172],"N-order":[174],"transpositions.":[176]},"counts_by_year":[],"updated_date":"2026-03-10T14:07:55.174380","created_date":"2025-10-10T00:00:00"}
