{"id":"https://openalex.org/W4306873623","doi":"https://doi.org/10.1145/3568956","title":"XEngine: Optimal Tensor Rematerialization for Neural Networks in Heterogeneous Environments","display_name":"XEngine: Optimal Tensor Rematerialization for Neural Networks in Heterogeneous Environments","publication_year":2022,"publication_date":"2022-10-20","ids":{"openalex":"https://openalex.org/W4306873623","doi":"https://doi.org/10.1145/3568956"},"language":"en","primary_location":{"id":"doi:10.1145/3568956","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3568956","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3568956","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3568956","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102901070","display_name":"Manuela Schuler","orcid":"https://orcid.org/0000-0001-8598-3410"},"institutions":[{"id":"https://openalex.org/I33256026","display_name":"German Research Centre for Artificial Intelligence","ror":"https://ror.org/01ayc5b57","country_code":"DE","type":"funder","lineage":["https://openalex.org/I33256026"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Manuela Schuler","raw_affiliation_strings":["Deutsches Forschungszentrum f\u00fcr K\u00fcnstliche Intelligenz (DFKI), Saarland Informatics Campus, Saarbr\u00fccken,  Germany"],"raw_orcid":"https://orcid.org/0000-0001-8598-3410","affiliations":[{"raw_affiliation_string":"Deutsches Forschungszentrum f\u00fcr K\u00fcnstliche Intelligenz (DFKI), Saarland Informatics Campus, Saarbr\u00fccken,  Germany","institution_ids":["https://openalex.org/I33256026"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024954324","display_name":"Richard Membarth","orcid":"https://orcid.org/0000-0002-9979-7579"},"institutions":[{"id":"https://openalex.org/I33256026","display_name":"German Research Centre for Artificial Intelligence","ror":"https://ror.org/01ayc5b57","country_code":"DE","type":"funder","lineage":["https://openalex.org/I33256026"]},{"id":"https://openalex.org/I4210106192","display_name":"Technische Hochschule Ingolstadt","ror":"https://ror.org/02bxzcy64","country_code":"DE","type":"education","lineage":["https://openalex.org/I4210106192"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Richard Membarth","raw_affiliation_strings":["Technische Hochschule Ingolstadt, Research Institute AImotion Bavaria, Ingolstadt, Germany and Deutsches Forschungszentrum f\u00fcr K\u00fcnstliche Intelligenz (DFKI), Saarland Informatics Campus, Saarbr\u00fccken, Germany"],"raw_orcid":"https://orcid.org/0000-0002-9979-7579","affiliations":[{"raw_affiliation_string":"Technische Hochschule Ingolstadt, Research Institute AImotion Bavaria, Ingolstadt, Germany and Deutsches Forschungszentrum f\u00fcr K\u00fcnstliche Intelligenz (DFKI), Saarland Informatics Campus, Saarbr\u00fccken, Germany","institution_ids":["https://openalex.org/I4210106192","https://openalex.org/I33256026"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026514026","display_name":"Philipp Slusallek","orcid":null},"institutions":[{"id":"https://openalex.org/I33256026","display_name":"German Research Centre for Artificial Intelligence","ror":"https://ror.org/01ayc5b57","country_code":"DE","type":"funder","lineage":["https://openalex.org/I33256026"]},{"id":"https://openalex.org/I91712215","display_name":"Saarland University","ror":"https://ror.org/01jdpyv68","country_code":"DE","type":"education","lineage":["https://openalex.org/I91712215"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Philipp Slusallek","raw_affiliation_strings":["Deutsches Forschungszentrum f\u00fcr K\u00fcnstliche Intelligenz (DFKI), Saarland Informatics Campus, Saarbr\u00fccken, Germany and Saarland University, Saarland Informatics Campus, Saarbr\u00fccken, Germany"],"raw_orcid":"https://orcid.org/0000-0002-2189-2429","affiliations":[{"raw_affiliation_string":"Deutsches Forschungszentrum f\u00fcr K\u00fcnstliche Intelligenz (DFKI), Saarland Informatics Campus, Saarbr\u00fccken, Germany and Saarland University, Saarland Informatics Campus, Saarbr\u00fccken, Germany","institution_ids":["https://openalex.org/I91712215","https://openalex.org/I33256026"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102901070"],"corresponding_institution_ids":["https://openalex.org/I33256026"],"apc_list":null,"apc_paid":null,"fwci":0.4101,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.55155875,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"20","issue":"1","first_page":"1","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8510445356369019},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.6551516056060791},{"id":"https://openalex.org/keywords/schedule","display_name":"Schedule","score":0.597806453704834},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.583014190196991},{"id":"https://openalex.org/keywords/programmer","display_name":"Programmer","score":0.5031527876853943},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.4951240122318268},{"id":"https://openalex.org/keywords/backpropagation","display_name":"Backpropagation","score":0.4895671308040619},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.47892841696739197},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4710754454135895},{"id":"https://openalex.org/keywords/graphics-processing-unit","display_name":"Graphics processing unit","score":0.435285359621048},{"id":"https://openalex.org/keywords/integer-programming","display_name":"Integer programming","score":0.4240674674510956},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3641819953918457},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3545847535133362},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3025408089160919},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.2568713128566742},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1931883692741394},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.12476450204849243},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09189364314079285}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8510445356369019},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.6551516056060791},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.597806453704834},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.583014190196991},{"id":"https://openalex.org/C2778514511","wikidata":"https://www.wikidata.org/wiki/Q1374194","display_name":"Programmer","level":2,"score":0.5031527876853943},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.4951240122318268},{"id":"https://openalex.org/C155032097","wikidata":"https://www.wikidata.org/wiki/Q798503","display_name":"Backpropagation","level":3,"score":0.4895671308040619},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.47892841696739197},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4710754454135895},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.435285359621048},{"id":"https://openalex.org/C56086750","wikidata":"https://www.wikidata.org/wiki/Q6042592","display_name":"Integer programming","level":2,"score":0.4240674674510956},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3641819953918457},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3545847535133362},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3025408089160919},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2568713128566742},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1931883692741394},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.12476450204849243},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09189364314079285},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3568956","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3568956","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3568956","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2212.09290","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2212.09290","pdf_url":"https://arxiv.org/pdf/2212.09290","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:publikationen.sulb.uni-saarland.de:20.500.11880/34902","is_oa":true,"landing_page_url":"http://dx.doi.org/10.22028/D291-38739","pdf_url":null,"source":{"id":"https://openalex.org/S4377196499","display_name":"Publications of the UdS (Saarland University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I91712215","host_organization_name":"Saarland University","host_organization_lineage":["https://openalex.org/I91712215"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"doc-type:article"},{"id":"doi:10.22028/d291-38739","is_oa":true,"landing_page_url":"https://doi.org/10.22028/d291-38739","pdf_url":null,"source":{"id":"https://openalex.org/S7407052975","display_name":"Universit\u00e4t des Saarlandes","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1145/3568956","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3568956","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3568956","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.4099999964237213,"id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321114","display_name":"Bundesministerium f\u00fcr Bildung und Forschung","ror":"https://ror.org/04pz7b180"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4306873623.pdf","grobid_xml":"https://content.openalex.org/works/W4306873623.grobid-xml"},"referenced_works_count":34,"referenced_works":["https://openalex.org/W1686810756","https://openalex.org/W1901129140","https://openalex.org/W1967134278","https://openalex.org/W2020527870","https://openalex.org/W2031070741","https://openalex.org/W2194775991","https://openalex.org/W2338908902","https://openalex.org/W2343908699","https://openalex.org/W2560674852","https://openalex.org/W2737740651","https://openalex.org/W2746663015","https://openalex.org/W2747329762","https://openalex.org/W2851456136","https://openalex.org/W2947664806","https://openalex.org/W2970587001","https://openalex.org/W2974097864","https://openalex.org/W2990896553","https://openalex.org/W3012479151","https://openalex.org/W3012514909","https://openalex.org/W3037639655","https://openalex.org/W3100141945","https://openalex.org/W3174394143","https://openalex.org/W3204105967","https://openalex.org/W4224330420","https://openalex.org/W4247353671","https://openalex.org/W4282974849","https://openalex.org/W4285503890","https://openalex.org/W4286236421","https://openalex.org/W4287756266","https://openalex.org/W4288017660","https://openalex.org/W4293404878","https://openalex.org/W4295720085","https://openalex.org/W4297645139","https://openalex.org/W6703652217"],"related_works":["https://openalex.org/W2379153735","https://openalex.org/W2046172023","https://openalex.org/W2972896947","https://openalex.org/W2170146914","https://openalex.org/W2355105570","https://openalex.org/W2083974823","https://openalex.org/W2072751097","https://openalex.org/W2015567081","https://openalex.org/W4237177615","https://openalex.org/W2289257917"],"abstract_inverted_index":{"Memory":[0],"efficiency":[1],"is":[2,73,206],"crucial":[3],"in":[4,28,34,53,107,202],"training":[5],"deep":[6,157],"learning":[7,158],"networks":[8,134,159,219],"on":[9,160,183,209],"resource-restricted":[10],"devices.":[11,66],"During":[12],"backpropagation,":[13,35],"forward":[14,37],"tensors":[15,38],"are":[16,32,192],"used":[17],"to":[18,59,81,104,153,194,239],"calculate":[19],"gradients.":[20],"Despite":[21],"the":[22,68,82,88,129,136,198,204,240],"option":[23],"of":[24,62,70,90,116,139,156,222],"keeping":[25],"those":[26],"dependencies":[27],"memory":[29,109,137,232],"until":[30],"they":[31],"reused":[33],"some":[36],"can":[39],"be":[40],"discarded":[41],"and":[42,77,114,125,127,227],"recomputed":[43],"later":[44],"from":[45],"saved":[46],"tensors,":[47],"so-called":[48],"checkpoints":[49,72,113],".":[50],"This":[51],"allows,":[52],"particular,":[54],"for":[55,132,218],"resource-constrained":[56],"heterogeneous":[57,105,161],"environments":[58,110],"make":[60],"use":[61,221],"all":[63],"available":[64],"compute":[65],"Unfortunately,":[67],"definition":[69],"these":[71],"a":[74,79,148,174,184,210],"non-trivial":[75],"problem":[76],"poses":[78],"challenge":[80],"programmer\u2014improper":[83],"or":[84],"excessive":[85],"recomputations":[86,115],"negate":[87],"benefit":[89],"checkpointing.":[91],"In":[92],"this":[93],"article,":[94],"we":[95,146],"present":[96],"XEngine,":[97],"an":[98],"approach":[99,119,179],"that":[100,180,191],"schedules":[101,217],"network":[102,205],"operators":[103,155],"devices":[106],"low":[108],"by":[111],"determining":[112],"tensors.":[117],"Our":[118,187],"selects":[120],"suitable":[121],"resources":[122],"per":[123],"timestep":[124],"operator":[126],"optimizes":[128],"end-to-end":[130],"time":[131],"neural":[133],"taking":[135],"limitation":[138],"each":[140],"device":[141],"into":[142],"account.":[143],"For":[144],"this,":[145],"formulate":[147],"mixed-integer":[149,175],"quadratic":[150],"program":[151],"(MIQP)":[152],"schedule":[154,201],"systems.":[162],"We":[163,213],"compare":[164],"our":[165],"MIQP":[166],"solver":[167,188],"XEngine":[168],"against":[169],"Checkmate":[170,200],"[":[171],"12":[172],"],":[173],"linear":[176],"programming":[177],"(MILP)":[178],"solves":[181],"recomputation":[182],"single":[185,211],"device.":[186,212],"finds":[189],"solutions":[190],"up":[193],"22.5%":[195],"faster":[196],"than":[197],"fastest":[199],"which":[203],"computed":[207],"exclusively":[208,238],"also":[214],"find":[215],"valid":[216],"making":[220],"both":[223],"central":[224],"processing":[225,229,242],"units":[226,230],"graphics":[228,241],"if":[231],"limitations":[233],"do":[234],"not":[235],"allow":[236],"scheduling":[237],"unit.":[243]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
