{"id":"https://openalex.org/W4405516731","doi":"https://doi.org/10.1007/s42514-024-00201-2","title":"Fine-grained vectorized merge sorting on RISC-V: from register to cache","display_name":"Fine-grained vectorized merge sorting on RISC-V: from register to cache","publication_year":2024,"publication_date":"2024-12-18","ids":{"openalex":"https://openalex.org/W4405516731","doi":"https://doi.org/10.1007/s42514-024-00201-2"},"language":"en","primary_location":{"id":"doi:10.1007/s42514-024-00201-2","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s42514-024-00201-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s42514-024-00201-2.pdf","source":{"id":"https://openalex.org/S4210190911","display_name":"CCF Transactions on High Performance Computing","issn_l":"2524-4922","issn":["2524-4922","2524-4930"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"CCF Transactions on High Performance Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s42514-024-00201-2.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031388726","display_name":"Jin Zhang","orcid":"https://orcid.org/0000-0003-0197-3624"},"institutions":[{"id":"https://openalex.org/I56934997","display_name":"Changsha University of Science and Technology","ror":"https://ror.org/03yph8055","country_code":"CN","type":"education","lineage":["https://openalex.org/I56934997"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jin Zhang","raw_affiliation_strings":["School of Computer and Communication Engineering, Changsha University of Science and Technology, Changsha, 410114, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, Changsha University of Science and Technology, Changsha, 410114, China","institution_ids":["https://openalex.org/I56934997"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101495148","display_name":"Jincheng Zhou","orcid":"https://orcid.org/0000-0003-2641-2495"},"institutions":[{"id":"https://openalex.org/I56934997","display_name":"Changsha University of Science and Technology","ror":"https://ror.org/03yph8055","country_code":"CN","type":"education","lineage":["https://openalex.org/I56934997"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jincheng Zhou","raw_affiliation_strings":["School of Computer and Communication Engineering, Changsha University of Science and Technology, Changsha, 410114, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, Changsha University of Science and Technology, Changsha, 410114, China","institution_ids":["https://openalex.org/I56934997"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100368854","display_name":"Xiang Zhang","orcid":"https://orcid.org/0000-0002-3272-894X"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiang Zhang","raw_affiliation_strings":["College of Computer, National University of Defense Technology, Changsha, 410073, China","Laboratory of Digitizing Software for Frontier Equipment, National University of Defense Technology, Changsha, 410073, China"],"affiliations":[{"raw_affiliation_string":"College of Computer, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"Laboratory of Digitizing Software for Frontier Equipment, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087147492","display_name":"Di Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Di Ma","raw_affiliation_strings":["College of Computer, National University of Defense Technology, Changsha, 410073, China"],"affiliations":[{"raw_affiliation_string":"College of Computer, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058447912","display_name":"Chunye Gong","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]},{"id":"https://openalex.org/I4210165734","display_name":"National Supercomputing Center of Tianjin","ror":"https://ror.org/05tngxm14","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210165734"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunye Gong","raw_affiliation_strings":["College of Computer, National University of Defense Technology, Changsha, 410073, China","Laboratory of Digitizing Software for Frontier Equipment, National University of Defense Technology, Changsha, 410073, China","National Supercomputer Center in Tianjin, Tianjin, 300457, China"],"affiliations":[{"raw_affiliation_string":"College of Computer, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"Laboratory of Digitizing Software for Frontier Equipment, National University of Defense Technology, Changsha, 410073, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National Supercomputer Center in Tianjin, Tianjin, 300457, China","institution_ids":["https://openalex.org/I4210165734"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5031388726"],"corresponding_institution_ids":["https://openalex.org/I56934997"],"apc_list":{"value":2190,"currency":"EUR","value_usd":2790},"apc_paid":{"value":2190,"currency":"EUR","value_usd":2790},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.24381142,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"7","issue":"1","first_page":"58","last_page":"71"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7384248971939087},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6859687566757202},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.594534695148468},{"id":"https://openalex.org/keywords/merge","display_name":"Merge (version control)","score":0.4976840317249298},{"id":"https://openalex.org/keywords/register","display_name":"Register (sociolinguistics)","score":0.4359988570213318}],"concepts":[{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7384248971939087},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6859687566757202},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.594534695148468},{"id":"https://openalex.org/C197129107","wikidata":"https://www.wikidata.org/wiki/Q1921621","display_name":"Merge (version control)","level":2,"score":0.4976840317249298},{"id":"https://openalex.org/C2779235478","wikidata":"https://www.wikidata.org/wiki/Q286576","display_name":"Register (sociolinguistics)","level":2,"score":0.4359988570213318},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s42514-024-00201-2","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s42514-024-00201-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s42514-024-00201-2.pdf","source":{"id":"https://openalex.org/S4210190911","display_name":"CCF Transactions on High Performance Computing","issn_l":"2524-4922","issn":["2524-4922","2524-4930"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"CCF Transactions on High Performance Computing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s42514-024-00201-2","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s42514-024-00201-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s42514-024-00201-2.pdf","source":{"id":"https://openalex.org/S4210190911","display_name":"CCF Transactions on High Performance Computing","issn_l":"2524-4922","issn":["2524-4922","2524-4930"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"CCF Transactions on High Performance Computing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3085993365","display_name":null,"funder_award_id":"(Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G37568934","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5594719277","display_name":null,"funder_award_id":"42104078","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5832729736","display_name":null,"funder_award_id":"62032023","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7239862944","display_name":null,"funder_award_id":"6190241","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7726157001","display_name":null,"funder_award_id":"Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4405516731.pdf","grobid_xml":"https://content.openalex.org/works/W4405516731.grobid-xml"},"referenced_works_count":12,"referenced_works":["https://openalex.org/W2077333890","https://openalex.org/W2082695854","https://openalex.org/W2085248745","https://openalex.org/W2127766448","https://openalex.org/W2141389982","https://openalex.org/W2166955231","https://openalex.org/W2278783412","https://openalex.org/W2406955896","https://openalex.org/W2977423784","https://openalex.org/W3141434431","https://openalex.org/W4210361997","https://openalex.org/W4391045497"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4312941541","https://openalex.org/W2361019053","https://openalex.org/W2071125430","https://openalex.org/W624979004","https://openalex.org/W4238987864","https://openalex.org/W4232669059","https://openalex.org/W2064119831"],"abstract_inverted_index":{"Abstract":[0],"Merge":[1],"sort":[2,41,61,73],"as":[3,30,126,155],"a":[4,31,68],"divide-sort-merge":[5,56],"paradigm":[6],"has":[7,175],"been":[8],"widely":[9],"applied":[10],"in":[11,86,103,162,286],"computer":[12],"science":[13],"fields.":[14],"As":[15],"modern":[16],"reduced":[17],"instruction":[18,37,83],"set":[19],"computing":[20],"architectures":[21],"like":[22],"the":[23,55,63,76,79,95,104,112,127,134,152,163,171,180,194,199,207,223,243,276,283,287],"fifth":[24],"generation":[25],"(RISC-V)":[26],"regard":[27],"multiple":[28],"registers":[29],"vector":[32],"register":[33,124,254],"group":[34],"for":[35,145],"wide":[36],"parallelism,":[38],"optimizing":[39],"merge":[40,72,157,161,166,173,204,211],"with":[42,142],"this":[43,51,190,219],"vectorized":[44,71,81,96,140],"property":[45],"is":[46,84,92,232,279],"becoming":[47],"increasingly":[48],"common.":[49],"In":[50],"paper,":[52],"we":[53,192],"overhaul":[54],"paradigm,":[57],"from":[58,111],"its":[59],"register-level":[60,77],"to":[62,66,122,132,197,205,218,221,234,236],"cache-aware":[64,156],"merge,":[65],"develop":[67],"fine-grained":[69,250],"RISC-V":[70,244],"(RVMS).":[74],"From":[75],"view,":[78],"inline":[80],"transpose":[82,135],"missed":[85],"RISC-V,":[87],"so":[88],"implementing":[89],"it":[90],"efficiently":[91],"non-trivial.":[93],"Besides,":[94],"comparisons":[97,141],"do":[98],"not":[99],"always":[100],"work":[101],"well":[102],"merging":[105,182,230,258,264],"networks.":[106],"Both":[107],"issues":[108],"primarily":[109],"stem":[110],"expensive":[113],"data":[114,125,130,160,225],"shuffle":[115,131],"instruction.":[116],"To":[117,189],"bypass":[118],"it,":[119],"RVMS":[120],"strides":[121],"take":[123],"proxy":[128],"of":[129,202,209],"accelerate":[133],"operation,":[136],"and":[137,213,262,272,291],"meanwhile":[138,214],"replaces":[139],"scalar":[143],"cousin":[144],"more":[146],"light":[147],"real":[148],"value":[149],"swap.":[150],"On":[151],"other":[153],"hand,":[154],"makes":[158],"larger":[159],"cache,":[164],"most":[165],"schemes":[167,252],"have":[168],"two":[169,237],"drawbacks:":[170],"in-cache":[172],"usually":[174],"low":[176],"cache":[177],"utilization,":[178],"while":[179],"out-of-cache":[181],"network":[183,231],"remains":[184],"an":[185,228],"ineffectively":[186],"symmetric":[187],"structure.":[188],"end,":[191],"propose":[193],"half-merge":[195,260],"scheme":[196],"employ":[198],"auxiliary":[200],"space":[201,220],"in-place":[203],"halve":[206],"footprint":[208],"na\u00efve":[210],"sort,":[212],"copy":[215],"one":[216],"sequence":[217],"avoid":[222],"former":[224],"exchange.":[226],"Furthermore,":[227],"asymmetric":[229,263],"developed":[233],"adapt":[235],"different":[238],"input":[239],"sizes.":[240],"Experiments":[241],"on":[242],"processor":[245],"SG2042":[246],"show":[247],"that":[248],"four":[249],"optimization":[251],"including":[253],"strided":[255],"transpose,":[256],"hybrid":[257],"network,":[259,265],"strategy,":[261],"improve":[266],"performance":[267,278],"by":[268],"4.05%,":[269],"19.88%,":[270],"12.23%,":[271],"11.04%":[273],"respectively.":[274],"Importantly,":[275],"overall":[277],"1.34x":[280],"faster":[281,293],"than":[282,294],"parallel":[284],"sorting":[285],"Boost":[288],"C++":[289],"library,":[290],"1.85x":[292],"std::sort.":[295]},"counts_by_year":[],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
