{"id":"https://openalex.org/W4414198771","doi":"https://doi.org/10.1109/dac63849.2025.11133299","title":"NDFT: Accelerating Density Functional Theory Calculations via Hardware/Software Co-Design on Near-Data Computing System","display_name":"NDFT: Accelerating Density Functional Theory Calculations via Hardware/Software Co-Design on Near-Data Computing System","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4414198771","doi":"https://doi.org/10.1109/dac63849.2025.11133299"},"language":"en","primary_location":{"id":"doi:10.1109/dac63849.2025.11133299","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11133299","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087085854","display_name":"Qingcai Jiang","orcid":"https://orcid.org/0000-0002-9729-8821"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qingcai Jiang","raw_affiliation_strings":["University of Science and Technology of China,School of Computer Science and Technology,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,School of Computer Science and Technology,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116358067","display_name":"Buxin Tu","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Buxin Tu","raw_affiliation_strings":["University of Science and Technology of China,School of Computer Science and Technology,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,School of Computer Science and Technology,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036729249","display_name":"Xiaoyu Hao","orcid":"https://orcid.org/0000-0003-4115-9475"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoyu Hao","raw_affiliation_strings":["University of Science and Technology of China,School of Computer Science and Technology,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,School of Computer Science and Technology,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101436158","display_name":"Junshi Chen","orcid":"https://orcid.org/0000-0002-6487-3658"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junshi Chen","raw_affiliation_strings":["University of Science and Technology of China,School of Computer Science and Technology,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,School of Computer Science and Technology,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085758579","display_name":"Hong An","orcid":"https://orcid.org/0000-0002-3900-3722"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hong An","raw_affiliation_strings":["University of Science and Technology of China,School of Computer Science and Technology,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,School of Computer Science and Technology,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5087085854"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26090101,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.8349999785423279,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.8349999785423279,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.49140000343322754},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4832000136375427},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4717999994754791},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.4260999858379364},{"id":"https://openalex.org/keywords/density-functional-theory","display_name":"Density functional theory","score":0.41260001063346863},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.33880001306533813}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7505000233650208},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.49140000343322754},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4832000136375427},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4717999994754791},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.4260999858379364},{"id":"https://openalex.org/C152365726","wikidata":"https://www.wikidata.org/wiki/Q1048589","display_name":"Density functional theory","level":2,"score":0.41260001063346863},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.358599990606308},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.33880001306533813},{"id":"https://openalex.org/C2777027219","wikidata":"https://www.wikidata.org/wiki/Q1284190","display_name":"Constant (computer programming)","level":2,"score":0.3327000141143799},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.301800012588501},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.29510000348091125},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.28040000796318054},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.2718000113964081},{"id":"https://openalex.org/C116672817","wikidata":"https://www.wikidata.org/wiki/Q1454986","display_name":"Physical system","level":2,"score":0.27059999108314514},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.26179999113082886},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.25769999623298645},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.2558000087738037},{"id":"https://openalex.org/C52622258","wikidata":"https://www.wikidata.org/wiki/Q131222","display_name":"Information theory","level":2,"score":0.25540000200271606}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dac63849.2025.11133299","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11133299","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321133","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1981220134","https://openalex.org/W1981943579","https://openalex.org/W2002555321","https://openalex.org/W2034861439","https://openalex.org/W2045431893","https://openalex.org/W2058398316","https://openalex.org/W2085842264","https://openalex.org/W2099789148","https://openalex.org/W2109574129","https://openalex.org/W2149590159","https://openalex.org/W2789554134","https://openalex.org/W2790310590","https://openalex.org/W2985229340","https://openalex.org/W3042598257","https://openalex.org/W3093552440","https://openalex.org/W3111721177","https://openalex.org/W3157531038","https://openalex.org/W3158911983","https://openalex.org/W4245923077","https://openalex.org/W4246166885","https://openalex.org/W4249322926","https://openalex.org/W4280604716","https://openalex.org/W4281685651","https://openalex.org/W4281731825","https://openalex.org/W4312267400","https://openalex.org/W4322724459","https://openalex.org/W4327930465","https://openalex.org/W4396639501","https://openalex.org/W4401568638"],"related_works":[],"abstract_inverted_index":{"Linear-response":[0],"time-dependent":[1],"Density":[2,97],"Functional":[3,98],"Theory":[4,99],"(LR-TDDFT)":[5],"is":[6,45],"a":[7,39,95,104,124,130,134,166],"widely":[8],"used":[9],"method":[10],"for":[11],"accurately":[12],"predicting":[13],"the":[14,35,46,54,57,74,80,83,118,144],"excited-state":[15],"properties":[16],"of":[17,42,56,77,115,133,155],"physical":[18,168],"systems.":[19],"Previous":[20],"works":[21,69],"have":[22],"attempted":[23],"to":[24,111,117,139],"accelerate":[25],"LR-TDDFT":[26,116,138],"using":[27],"heterogeneous":[28,58],"systems":[29],"such":[30],"as":[31],"GPUs,":[32],"FPGAs,":[33],"and":[34,53,108,157,161],"Sunway":[36],"architecture.":[37],"However,":[38],"major":[40],"drawback":[41],"these":[43,68,90],"approaches":[44],"constant":[47],"data":[48,64],"movement":[49,65],"between":[50],"host":[51],"memory":[52,55],"systems,":[59],"which":[60],"results":[61,148],"in":[62,137],"substantial":[63],"overhead.":[66],"Moreover,":[67],"focus":[70],"primarily":[71],"on":[72,143,165],"optimizing":[73],"compute-intensive":[75],"portions":[76],"LR-TDDFT,":[78],"despite":[79],"fact":[81],"that":[82,150],"calculation":[84],"steps":[85],"are":[86],"fundamentally":[87],"memory-bound.To":[88],"address":[89],"challenges,":[91],"we":[92,102,128],"propose":[93],"NDFT,":[94],"Near-Data":[96],"framework.":[100],"Specifically,":[101],"design":[103],"novel":[105],"task":[106],"partitioning":[107],"scheduling":[109],"mechanism":[110],"offload":[112],"each":[113],"part":[114],"most":[119],"suitable":[120],"computing":[121],"units":[122],"within":[123],"CPU-NDP":[125,145],"system.":[126,146,169],"Additionally,":[127],"implement":[129],"hardware/software":[131],"co-optimization":[132],"critical":[135],"kernel":[136],"further":[140],"enhance":[141],"performance":[142,153],"Our":[147],"show":[149],"NDFT":[151],"achieves":[152],"improvements":[154],"5.2x":[156],"2.5x":[158],"over":[159],"CPU":[160],"GPU":[162],"baselines,":[163],"respectively,":[164],"large":[167]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
