{"id":"https://openalex.org/W4394871726","doi":"https://doi.org/10.1145/3617232.3624858","title":"Optimizing Deep Learning Inference via Global Analysis and Tensor Expressions","display_name":"Optimizing Deep Learning Inference via Global Analysis and Tensor Expressions","publication_year":2024,"publication_date":"2024-04-17","ids":{"openalex":"https://openalex.org/W4394871726","doi":"https://doi.org/10.1145/3617232.3624858"},"language":"en","primary_location":{"id":"doi:10.1145/3617232.3624858","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3617232.3624858","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3617232.3624858","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3617232.3624858","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5095381671","display_name":"Chunwei Xia","orcid":"https://orcid.org/0000-0003-2014-5453"},"institutions":[{"id":"https://openalex.org/I130828816","display_name":"University of Leeds","ror":"https://ror.org/024mrxd33","country_code":"GB","type":"education","lineage":["https://openalex.org/I130828816"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN","GB"],"is_corresponding":true,"raw_author_name":"Chunwei Xia","raw_affiliation_strings":["School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China","School of Computing, University of Leeds, Leeds, United Kingdom","State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China School of Computing, University of Leeds, Leeds, United Kingdom"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"School of Computing, University of Leeds, Leeds, United Kingdom","institution_ids":["https://openalex.org/I130828816"]},{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China School of Computing, University of Leeds, Leeds, United Kingdom","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I130828816","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017298602","display_name":"Jiacheng Zhao","orcid":"https://orcid.org/0000-0001-5228-8972"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiacheng Zhao","raw_affiliation_strings":["School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China","State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022330556","display_name":"Q.X. Sun","orcid":"https://orcid.org/0009-0009-8939-7721"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qianqi Sun","raw_affiliation_strings":["School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China","State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100401045","display_name":"Zheng Wang","orcid":"https://orcid.org/0000-0001-6157-0662"},"institutions":[{"id":"https://openalex.org/I130828816","display_name":"University of Leeds","ror":"https://ror.org/024mrxd33","country_code":"GB","type":"education","lineage":["https://openalex.org/I130828816"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zheng Wang","raw_affiliation_strings":["School of Computing, University of Leeds, Leeds, United Kingdom"],"affiliations":[{"raw_affiliation_string":"School of Computing, University of Leeds, Leeds, United Kingdom","institution_ids":["https://openalex.org/I130828816"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048534698","display_name":"Yuan Wen","orcid":"https://orcid.org/0000-0002-6747-947X"},"institutions":[{"id":"https://openalex.org/I195460627","display_name":"University of Aberdeen","ror":"https://ror.org/016476m91","country_code":"GB","type":"education","lineage":["https://openalex.org/I195460627"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yuan Wen","raw_affiliation_strings":["University of Aberdeen, Aberdeen, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Aberdeen, Aberdeen, United Kingdom","institution_ids":["https://openalex.org/I195460627"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081412795","display_name":"Teng Yu","orcid":"https://orcid.org/0000-0003-4391-8295"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Teng Yu","raw_affiliation_strings":["Thewake Systems Ltd, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Thewake Systems Ltd, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053070701","display_name":"Xiaobing Feng","orcid":"https://orcid.org/0000-0003-2909-7750"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaobing Feng","raw_affiliation_strings":["School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China","State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","Zhongguancun Laboratory, Beijing, China","State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China Zhongguancun Laboratory, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Zhongguancun Laboratory, Beijing, China","institution_ids":[]},{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China Zhongguancun Laboratory, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086633294","display_name":"Huimin Cui","orcid":"https://orcid.org/0000-0002-2491-7679"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huimin Cui","raw_affiliation_strings":["School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China","State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China School of Computer Science and Technology, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5095381671"],"corresponding_institution_ids":["https://openalex.org/I130828816","https://openalex.org/I19820366","https://openalex.org/I4210090176","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":2.4463,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.89871455,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"286","last_page":"301"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9871000051498413,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8366470336914062},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.6856290698051453},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6373085975646973},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5044885873794556},{"id":"https://openalex.org/keywords/subroutine","display_name":"Subroutine","score":0.5022115707397461},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.48500964045524597},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4607487916946411},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.45885494351387024},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4330926537513733},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.4320893883705139},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.41381022334098816},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.41199707984924316},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.3593160808086395},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.359284907579422}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8366470336914062},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.6856290698051453},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6373085975646973},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5044885873794556},{"id":"https://openalex.org/C96147967","wikidata":"https://www.wikidata.org/wiki/Q190686","display_name":"Subroutine","level":2,"score":0.5022115707397461},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.48500964045524597},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4607487916946411},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.45885494351387024},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4330926537513733},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.4320893883705139},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.41381022334098816},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.41199707984924316},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3593160808086395},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.359284907579422},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3617232.3624858","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3617232.3624858","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3617232.3624858","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.whiterose.ac.uk:203681","is_oa":true,"landing_page_url":null,"pdf_url":"https://eprints.whiterose.ac.uk/203681/8/Optimizing%20Deep%20Learning%20Inference%20via%20Global%20Analysis%20and%20Tensor%20Expressions.pdf","source":{"id":"https://openalex.org/S4306400854","display_name":"White Rose Research Online (University of Leeds, The University of Sheffield, University of York)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2800616092","host_organization_name":"White Rose University Consortium","host_organization_lineage":["https://openalex.org/I2800616092"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Proceedings Paper"},{"id":"pmh:oai:aura.abdn.ac.uk:2164/23949","is_oa":true,"landing_page_url":"https://eprints.whiterose.ac.uk/203681/","pdf_url":null,"source":{"id":"https://openalex.org/S4306400966","display_name":"Aberdeen University Research Archive (Aberdeen University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I195460627","host_organization_name":"University of Aberdeen","host_organization_lineage":["https://openalex.org/I195460627"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference item"}],"best_oa_location":{"id":"doi:10.1145/3617232.3624858","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3617232.3624858","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3617232.3624858","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1289081580","display_name":null,"funder_award_id":"22003073","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1934935867","display_name":null,"funder_award_id":"Engineering and Physical Sciences R","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G249161853","display_name":null,"funder_award_id":"T2222026","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2674912097","display_name":null,"funder_award_id":"6209002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3496866952","display_name":null,"funder_award_id":"T222202","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4120025490","display_name":"Modernise Compiler Technology With Deep Learning","funder_award_id":"EP/X018202/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G4248049630","display_name":null,"funder_award_id":"2200307","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4478266629","display_name":null,"funder_award_id":"62090024","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G68136634","display_name":null,"funder_award_id":"2090024","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8136545603","display_name":null,"funder_award_id":"62232015","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8513333314","display_name":null,"funder_award_id":"2021ZD01","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8744364856","display_name":null,"funder_award_id":"110101","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"},{"id":"https://openalex.org/F4320334978","display_name":"Beijing Nova Program","ror":"https://ror.org/034k14f91"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4394871726.pdf","grobid_xml":"https://content.openalex.org/works/W4394871726.grobid-xml"},"referenced_works_count":34,"referenced_works":["https://openalex.org/W1992558790","https://openalex.org/W2015752165","https://openalex.org/W2023415862","https://openalex.org/W2034761517","https://openalex.org/W2051305716","https://openalex.org/W2064675550","https://openalex.org/W2318529993","https://openalex.org/W2471164860","https://openalex.org/W2549139847","https://openalex.org/W2606722458","https://openalex.org/W2767274246","https://openalex.org/W2790808809","https://openalex.org/W2791175987","https://openalex.org/W2794670651","https://openalex.org/W2799269451","https://openalex.org/W2809290718","https://openalex.org/W2890044493","https://openalex.org/W2914304175","https://openalex.org/W2922008702","https://openalex.org/W2949967139","https://openalex.org/W2962724414","https://openalex.org/W2963821229","https://openalex.org/W2970281776","https://openalex.org/W2981758446","https://openalex.org/W3119880013","https://openalex.org/W3148573243","https://openalex.org/W3166948093","https://openalex.org/W3173358825","https://openalex.org/W3174529902","https://openalex.org/W4212986322","https://openalex.org/W4220727415","https://openalex.org/W4285167314","https://openalex.org/W4318541538","https://openalex.org/W4321496239"],"related_works":["https://openalex.org/W2293118914","https://openalex.org/W2998381397","https://openalex.org/W4236419692","https://openalex.org/W2058965144","https://openalex.org/W2171015181","https://openalex.org/W3167919718","https://openalex.org/W4251718783","https://openalex.org/W4239447582","https://openalex.org/W2187181201","https://openalex.org/W2778498407"],"abstract_inverted_index":{"Optimizing":[0],"deep":[1],"neural":[2],"network":[3],"(DNN)":[4],"execution":[5],"is":[6],"important":[7],"but":[8],"becomes":[9],"increasingly":[10],"difficult":[11],"as":[12],"DNN":[13,17,43,106,122],"complexity":[14],"grows.":[15],"Existing":[16],"compilers":[18],"cannot":[19],"effectively":[20],"exploit":[21],"optimization":[22,84],"opportunities":[23],"across":[24,45],"operator":[25,46],"boundaries,":[26],"leaving":[27],"room":[28],"for":[29],"improvement.":[30],"To":[31],"address":[32],"this":[33],"challenge,":[34],"we":[35],"present":[36],"Souffle,":[37],"an":[38,89,109],"open-source":[39],"compiler":[40],"that":[41,116],"optimizes":[42],"inference":[44],"boundaries.":[47],"Souffle":[48,81,102,117],"creates":[49],"a":[50,79,126],"global":[51],"tensor":[52,56,62],"dependency":[53],"graph":[54,68],"using":[55,103],"expressions,":[57],"traces":[58],"data":[59,98],"flow":[60],"and":[61,64,75,93,97,136],"information,":[63],"partitions":[65],"the":[66],"computation":[67],"into":[69],"subprograms":[70],"based":[71],"on":[72,108],"dataflow":[73],"analysis":[74],"resource":[76],"constraints.":[77],"Within":[78],"subprogram,":[80],"performs":[82],"local":[83],"via":[85],"semantic-preserving":[86],"transformations,":[87],"finds":[88],"optimized":[90],"program":[91],"schedule,":[92],"improves":[94],"instruction-level":[95],"parallelism":[96],"reuse.":[99],"We":[100],"evaluated":[101],"six":[104,120],"representative":[105],"models":[107],"NVIDIA":[110],"A100":[111],"GPU.":[112],"Experimental":[113],"results":[114],"show":[115],"consistently":[118],"outperforms":[119],"state-of-the-art":[121],"optimizers":[123],"by":[124],"delivering":[125],"geometric":[127],"mean":[128],"speedup":[129],"of":[130],"up":[131],"to":[132],"3.7\u00d7":[133],"over":[134,138],"TensorRT":[135],"7.8\u00d7":[137],"Tensorflow":[139],"XLA.":[140]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
