{"id":"https://openalex.org/W4416004152","doi":"https://doi.org/10.1145/3731599.3767703","title":"A GPU FFT Wrapper to Co-optimize Floating-Point Precision and Library Selection via Predictive Error Modeling","display_name":"A GPU FFT Wrapper to Co-optimize Floating-Point Precision and Library Selection via Predictive Error Modeling","publication_year":2025,"publication_date":"2025-11-07","ids":{"openalex":"https://openalex.org/W4416004152","doi":"https://doi.org/10.1145/3731599.3767703"},"language":null,"primary_location":{"id":"doi:10.1145/3731599.3767703","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731599.3767703","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3731599.3767703","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120297566","display_name":"Julius Lehner","orcid":"https://orcid.org/0009-0002-9032-107X"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Julius Lehner","raw_affiliation_strings":["Technical University of Munich, Munich, Germany"],"raw_orcid":"https://orcid.org/0009-0002-9032-107X","affiliations":[{"raw_affiliation_string":"Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087319091","display_name":"Eishi Arima","orcid":"https://orcid.org/0009-0002-7043-4288"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Eishi Arima","raw_affiliation_strings":["Technical University of Munich, Garching, Germany"],"raw_orcid":"https://orcid.org/0009-0002-7043-4288","affiliations":[{"raw_affiliation_string":"Technical University of Munich, Garching, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045289712","display_name":"Martin Schulz","orcid":"https://orcid.org/0000-0001-9013-435X"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Martin Schulz","raw_affiliation_strings":["Technical University of Munich, Garching, Germany"],"raw_orcid":"https://orcid.org/0000-0001-9013-435X","affiliations":[{"raw_affiliation_string":"Technical University of Munich, Garching, Germany","institution_ids":["https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5120297566"],"corresponding_institution_ids":["https://openalex.org/I62916508"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.35197943,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1534","last_page":"1543"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.7602999806404114,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.7602999806404114,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11034","display_name":"Digital Filter Design and Implementation","score":0.07689999788999557,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.04050000011920929,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fast-fourier-transform","display_name":"Fast Fourier transform","score":0.7221999764442444},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6704000234603882},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.5996000170707703},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5924999713897705},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5328999757766724},{"id":"https://openalex.org/keywords/energy","display_name":"Energy (signal processing)","score":0.5195000171661377},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.49470001459121704},{"id":"https://openalex.org/keywords/on-the-fly","display_name":"On the fly","score":0.4284000098705292}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8338000178337097},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.7221999764442444},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6704000234603882},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.5996000170707703},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5924999713897705},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5328999757766724},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.5195000171661377},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.49470001459121704},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4438999891281128},{"id":"https://openalex.org/C2781020372","wikidata":"https://www.wikidata.org/wiki/Q533093","display_name":"On the fly","level":2,"score":0.4284000098705292},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4237000048160553},{"id":"https://openalex.org/C57733114","wikidata":"https://www.wikidata.org/wiki/Q1006032","display_name":"Discrete Fourier transform (general)","level":5,"score":0.3765000104904175},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3628999888896942},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.350600004196167},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3400999903678894},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3382999897003174},{"id":"https://openalex.org/C102519508","wikidata":"https://www.wikidata.org/wiki/Q6520159","display_name":"Fourier transform","level":2,"score":0.3312000036239624},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.3149000108242035},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3147999942302704},{"id":"https://openalex.org/C2775973920","wikidata":"https://www.wikidata.org/wiki/Q3252726","display_name":"Selection algorithm","level":3,"score":0.31459999084472656},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.30869999527931213},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.3084000051021576},{"id":"https://openalex.org/C35912277","wikidata":"https://www.wikidata.org/wiki/Q1243369","display_name":"Double-precision floating-point format","level":3,"score":0.30630001425743103},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.2687999904155731},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2687000036239624},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25859999656677246}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3731599.3767703","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731599.3767703","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3731599.3767703","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731599.3767703","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1501755821","https://openalex.org/W2009516879","https://openalex.org/W2020217519","https://openalex.org/W2061171222","https://openalex.org/W2078095679","https://openalex.org/W2081368694","https://openalex.org/W2098834788","https://openalex.org/W2102182691","https://openalex.org/W2114927422","https://openalex.org/W2171473263","https://openalex.org/W2207050309","https://openalex.org/W2244005500","https://openalex.org/W2265166184","https://openalex.org/W2485003951","https://openalex.org/W2743322459","https://openalex.org/W2774309575","https://openalex.org/W2791673912","https://openalex.org/W2808739938","https://openalex.org/W2838756705","https://openalex.org/W2902494873","https://openalex.org/W2971542961","https://openalex.org/W2984602355","https://openalex.org/W3000483857","https://openalex.org/W3036005033","https://openalex.org/W3094872547","https://openalex.org/W3113606433","https://openalex.org/W3159918017","https://openalex.org/W3207730444","https://openalex.org/W3217045543","https://openalex.org/W4318686086","https://openalex.org/W4319069056","https://openalex.org/W4393931571","https://openalex.org/W4406799067","https://openalex.org/W4412048332","https://openalex.org/W4412549240","https://openalex.org/W4413458368"],"related_works":[],"abstract_inverted_index":{"Approximate":[0],"and":[1,9,21,39,66,75,92,128],"low-precision":[2],"computing":[3],"are":[4],"essential":[5],"for":[6],"modern":[7],"applications,":[8],"effectively":[10],"leveraging":[11],"available":[12],"precision":[13,137],"options":[14],"can":[15],"deliver":[16],"substantial":[17],"gains":[18],"in":[19,36,59],"performance":[20],"energy":[22,101],"efficiency.":[23],"We":[24],"focus":[25],"on":[26,72,94],"the":[27,63,88,95],"Fast":[28],"Fourier":[29],"Transform":[30],"(FFT),":[31],"a":[32,41,84,104,135,147],"representative":[33],"function":[34],"used":[35],"scientific":[37],"computing,":[38],"propose":[40,83],"wrapper":[42,148],"library":[43],"to":[44,97,134],"exploit":[45],"these":[46,68,80],"options.":[47],"Using":[48],"multiple":[49],"GPU-accelerated":[50],"FFT":[51],"libraries,":[52],"we":[53,82],"observe":[54],"that":[55,67,86],"different":[56,60],"libraries":[57],"excel":[58],"regions":[61],"of":[62],"performance\u2013accuracy":[64],"space":[65],"sweet":[69],"spots":[70],"depend":[71],"transform":[73],"size":[74],"input":[76,120],"content.":[77],"Guided":[78],"by":[79],"insights,":[81],"framework":[85,140],"selects":[87],"best":[89],"kernel":[90],"(library":[91],"precision)":[93],"fly":[96],"minimize":[98],"runtime":[99,117],"or":[100],"while":[102],"satisfying":[103],"specified":[105],"error":[106,115],"threshold.":[107],"A":[108],"lightweight":[109],"machine":[110],"learning":[111],"model":[112],"predicts":[113],"per-kernel":[114],"at":[116],"from":[118],"sampled":[119],"features.":[121],"Experiments":[122],"show":[123],"over":[124],"98%":[125],"selection":[126],"accuracy":[127],"mean":[129],"speedups":[130],"exceeding":[131],"40%":[132],"compared":[133],"double":[136],"baseline.":[138],"The":[139],"integrates":[141],"seamlessly":[142],"with":[143],"existing":[144],"workflows":[145],"as":[146],"library.":[149]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-11-07T00:00:00"}
