{"id":"https://openalex.org/W7125950438","doi":"https://doi.org/10.1109/ase63991.2025.00147","title":"HybridSIMD: A Super C++ SIMD Library with Integrated Auto-tuning Capabilities","display_name":"HybridSIMD: A Super C++ SIMD Library with Integrated Auto-tuning Capabilities","publication_year":2025,"publication_date":"2025-11-16","ids":{"openalex":"https://openalex.org/W7125950438","doi":"https://doi.org/10.1109/ase63991.2025.00147"},"language":null,"primary_location":{"id":"doi:10.1109/ase63991.2025.00147","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ase63991.2025.00147","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 40th IEEE/ACM International Conference on Automated Software Engineering (ASE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101932106","display_name":"Haiyan Pan","orcid":"https://orcid.org/0000-0002-5464-1642"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haolin Pan","raw_affiliation_strings":["Institute of Software,Chinese Academy of Sciences,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Software,Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124089281","display_name":"Xulin Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xulin Zhou","raw_affiliation_strings":["Institute of Software,Chinese Academy of Sciences,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Software,Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124141240","display_name":"Mingjie Xing","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingjie Xing","raw_affiliation_strings":["Institute of Software,Chinese Academy of Sciences,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Software,Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5124053661","display_name":"Yanjun Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanjun Wu","raw_affiliation_strings":["Institute of Software,Chinese Academy of Sciences,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Software,Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101932106"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210128818"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.78558582,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1756","last_page":"1767"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.758899986743927,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.758899986743927,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.07580000162124634,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.02979999966919422,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.7738000154495239},{"id":"https://openalex.org/keywords/usability","display_name":"Usability","score":0.6672999858856201},{"id":"https://openalex.org/keywords/fragmentation","display_name":"Fragmentation (computing)","score":0.5055000185966492},{"id":"https://openalex.org/keywords/digital-library","display_name":"Digital library","score":0.40310001373291016},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.3991999924182892},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.38760000467300415},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.3714999854564667}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8048999905586243},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.7738000154495239},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.6672999858856201},{"id":"https://openalex.org/C191015642","wikidata":"https://www.wikidata.org/wiki/Q1132459","display_name":"Fragmentation (computing)","level":2,"score":0.5055000185966492},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.49390000104904175},{"id":"https://openalex.org/C513874922","wikidata":"https://www.wikidata.org/wiki/Q212805","display_name":"Digital library","level":3,"score":0.40310001373291016},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.3991999924182892},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.38760000467300415},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.3714999854564667},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.3596000075340271},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.3497999906539917},{"id":"https://openalex.org/C89505385","wikidata":"https://www.wikidata.org/wiki/Q47146","display_name":"User interface","level":2,"score":0.3327000141143799},{"id":"https://openalex.org/C2779010991","wikidata":"https://www.wikidata.org/wiki/Q2720909","display_name":"Artifact (error)","level":2,"score":0.302700012922287},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3010999858379364},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.2831999957561493},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.28290000557899475},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.272599995136261},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.27000001072883606},{"id":"https://openalex.org/C44210515","wikidata":"https://www.wikidata.org/wiki/Q16968978","display_name":"Bespoke","level":2,"score":0.26980000734329224}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ase63991.2025.00147","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ase63991.2025.00147","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 40th IEEE/ACM International Conference on Automated Software Engineering (ASE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1543313205","https://openalex.org/W1978362304","https://openalex.org/W1991009705","https://openalex.org/W2004183895","https://openalex.org/W2025437939","https://openalex.org/W2033829515","https://openalex.org/W2064358806","https://openalex.org/W2077791698","https://openalex.org/W2163000655","https://openalex.org/W2293666837","https://openalex.org/W2333659671","https://openalex.org/W2510443011","https://openalex.org/W2788030525","https://openalex.org/W2887312645","https://openalex.org/W2896878866","https://openalex.org/W2898845184","https://openalex.org/W2923950794","https://openalex.org/W3016220765","https://openalex.org/W3082412394","https://openalex.org/W3152824079","https://openalex.org/W3156604833","https://openalex.org/W4206059443","https://openalex.org/W4292869481","https://openalex.org/W4321496225","https://openalex.org/W4385574963","https://openalex.org/W4394871722","https://openalex.org/W4398234882"],"related_works":[],"abstract_inverted_index":{"Single":[0],"Instruction,":[1],"Multiple":[2],"Data":[3],"(SIMD)":[4],"technology":[5],"is":[6,160],"crucial":[7],"for":[8,42,56],"enhancing":[9],"computational":[10],"efficiency":[11],"in":[12,37,151],"High-Performance":[13],"Computing":[14],"(HPC).":[15],"While":[16],"C++":[17,64],"SIMD":[18,57],"libraries":[19],"abstract":[20],"away":[21],"low-level":[22],"complexities,":[23],"their":[24],"proliferation":[25],"has":[26],"led":[27],"to":[28,66,109],"a":[29,52,63,73,94],"fragmented":[30],"set":[31],"of":[32,86],"libraries,":[33],"creating":[34],"significant":[35],"challenges":[36],"both":[38],"performance":[39,112],"and":[40,76,126,144],"usability":[41],"developers.":[43],"To":[44],"overcome":[45],"these":[46],"library-level":[47],"limitations,":[48],"this":[49,68,106],"paper":[50],"introduces":[51],"new":[53],"collaborative":[54,79,107],"concept":[55],"library":[58,65],"design.":[59],"We":[60],"present":[61],"HybridSIMD,":[62],"embody":[67],"principle,":[69],"resolving":[70,152],"fragmentation":[71,153],"through":[72],"unified":[74],"interface":[75],"an":[77],"operator-level":[78],"back-end":[80],"that":[81],"leverages":[82],"the":[83,100,133],"collective":[84],"strengths":[85],"existing":[87],"libraries.":[88],"A":[89],"built-in":[90],"auto-tuning":[91],"engine,":[92],"featuring":[93],"hierarchical":[95],"search":[96],"strategy,":[97],"automatically":[98],"navigates":[99],"rich":[101],"optimization":[102],"space":[103],"created":[104],"by":[105],"approach":[108],"deliver":[110],"maximum":[111],"without":[113],"manual":[114],"intervention.":[115],"Experimental":[116],"results":[117],"across":[118],"six":[119],"real-world":[120],"HPC":[121],"benchmarks":[122],"on":[123,139,142,146],"AVX2,":[124,140],"AVX512,":[125,143],"NEON":[127],"architectures":[128],"demonstrate":[129],"HybridSIMD\u2019s":[130],"superiority.":[131],"Notably,":[132],"highest":[134],"speedups":[135],"achieved":[136],"are":[137],"185.34\u00d7":[138],"97.80\u00d7":[141],"71.32\u00d7":[145],"NEON,":[147],"showcasing":[148],"its":[149],"effectiveness":[150],"while":[154],"delivering":[155],"state-of-the-art":[156],"performance.":[157],"Our":[158],"artifact":[159],"available":[161],"at":[162],"https://github.com/Panhaolin2001/HybridSIMD.":[163]},"counts_by_year":[],"updated_date":"2026-02-23T20:09:44.859080","created_date":"2026-01-29T00:00:00"}
