{"id":"https://openalex.org/W4327930552","doi":"https://doi.org/10.1145/3582016.3582017","title":"DefT: Boosting Scalability of Deformable Convolution Operations on GPUs","display_name":"DefT: Boosting Scalability of Deformable Convolution Operations on GPUs","publication_year":2023,"publication_date":"2023-03-20","ids":{"openalex":"https://openalex.org/W4327930552","doi":"https://doi.org/10.1145/3582016.3582017"},"language":"en","primary_location":{"id":"doi:10.1145/3582016.3582017","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3582016.3582017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074284629","display_name":"Edward Hanson","orcid":"https://orcid.org/0000-0001-5179-8401"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Edward Hanson","raw_affiliation_strings":["Duke University, Durham, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Duke University, Durham, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014558474","display_name":"Mark Horton","orcid":"https://orcid.org/0000-0003-2267-8026"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mark Horton","raw_affiliation_strings":["Duke University, Durham, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Duke University, Durham, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100429403","display_name":"Hai Li","orcid":"https://orcid.org/0000-0003-3228-6544"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hai (Helen) Li","raw_affiliation_strings":["Duke University, Durham, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Duke University, Durham, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058073627","display_name":"Yiran Chen","orcid":"https://orcid.org/0000-0002-1486-8412"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yiran Chen","raw_affiliation_strings":["Duke University, Durham, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Duke University, Durham, USA","institution_ids":["https://openalex.org/I170897317"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I170897317"],"apc_list":null,"apc_paid":null,"fwci":0.1111,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.35400086,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"134","last_page":"146"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8046484589576721},{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.7249712347984314},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5965832471847534},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.570353090763092},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.5120382905006409},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.5104809999465942},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.4543123245239258},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38749271631240845},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3669024705886841},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3201185166835785},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.31989628076553345},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.21361610293388367},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.17824909090995789}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8046484589576721},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.7249712347984314},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5965832471847534},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.570353090763092},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.5120382905006409},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.5104809999465942},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.4543123245239258},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38749271631240845},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3669024705886841},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3201185166835785},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.31989628076553345},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.21361610293388367},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.17824909090995789}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3582016.3582017","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3582016.3582017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2830078906","display_name":null,"funder_award_id":"W911NF-19-2-0107","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"}],"funders":[{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1987588924","https://openalex.org/W2108598243","https://openalex.org/W2194775991","https://openalex.org/W2289252105","https://openalex.org/W2601564443","https://openalex.org/W2782278879","https://openalex.org/W2935331687","https://openalex.org/W2945146780","https://openalex.org/W2946429553","https://openalex.org/W2952122856","https://openalex.org/W2953212265","https://openalex.org/W2954996726","https://openalex.org/W2963150697","https://openalex.org/W2963598138","https://openalex.org/W2966926453","https://openalex.org/W2980200167","https://openalex.org/W2990138404","https://openalex.org/W3016719260","https://openalex.org/W3132460984","https://openalex.org/W3138516171","https://openalex.org/W3157609068","https://openalex.org/W3174208896","https://openalex.org/W3174529902","https://openalex.org/W3179964091","https://openalex.org/W3206290655","https://openalex.org/W4200566604","https://openalex.org/W4214866065","https://openalex.org/W4224884862","https://openalex.org/W4249932213","https://openalex.org/W4287078945","https://openalex.org/W4288325606","https://openalex.org/W4312960790"],"related_works":["https://openalex.org/W4382323155","https://openalex.org/W4315697128","https://openalex.org/W2125652721","https://openalex.org/W1540371141","https://openalex.org/W2982536526","https://openalex.org/W2995343971","https://openalex.org/W2992221004","https://openalex.org/W4383502363","https://openalex.org/W4292794827","https://openalex.org/W4224939635"],"abstract_inverted_index":{"Deformable":[0],"Convolutional":[1,17],"Networks":[2,19],"(DCN)":[3],"have":[4,48,110],"been":[5,50],"proposed":[6],"as":[7],"a":[8,39,131,141,145,174,204,273,281,290],"powerful":[9],"tool":[10],"to":[11,52,65,181,225,238,243,260,272,278],"boost":[12],"the":[13,29,67,85,89,103,149,162,168,182,185,189,240,244,266],"representation":[14],"power":[15],"of":[16,28,55,70,91,119,153,167,184,192,206,235,262,276,283,287],"Neural":[18],"(CNN)":[20],"in":[21,102,121,155,199,216],"computer":[22],"vision":[23,35,125],"tasks":[24],"via":[25],"adaptive":[26],"sampling":[27],"input":[30,251],"feature":[31],"map.":[32],"Much":[33],"like":[34],"transformers,":[36],"DCNs":[37,193,217],"utilize":[38],"more":[40,97,150],"flexible":[41],"inductive":[42],"bias":[43],"than":[44],"standard":[45],"CNNs":[46],"and":[47,82,194,218,280],"also":[49,231],"shown":[51,64],"improve":[53,107],"performance":[54,214],"particular":[56],"models.":[57],"For":[58],"example,":[59],"drop-in":[60],"DCN":[61,98,134,210,227,236,245,263],"layers":[62,99],"were":[63],"increase":[66],"AP":[68],"score":[69],"Mask":[71],"RCNN":[72],"by":[73,161],"10.6":[74],"points":[75],"while":[76],"introducing":[77],"only":[78],"1%":[79],"additional":[80],"parameters":[81],"FLOPs,":[83],"improving":[84],"state-of-the-art":[86],"model":[87],"at":[88],"time":[90,286],"publication.":[92],"However,":[93],"despite":[94],"evidence":[95],"that":[96,130,222,256],"placed":[100],"earlier":[101],"network":[104],"can":[105],"further":[106,117],"performance,":[108],"we":[109,201],"not":[111],"seen":[112],"this":[113],"trend":[114],"continue":[115],"with":[116],"scaling":[118],"deformations":[120,154],"CNNs,":[122,200],"unlike":[123],"for":[124,209],"transformers.":[126],"Benchmarking":[127],"experiments":[128],"show":[129,255],"realistically":[132],"sized":[133],"layer":[135,246],"(64H\u00d764W,":[136],"64":[137],"in-out":[138],"channel)":[139],"incurs":[140],"4\u00d7":[142],"slowdown":[143,191,264],"on":[144,289],"GPU":[146],"platform,":[147],"discouraging":[148],"ubiquitous":[151],"use":[152,198],"CNNs.":[156],"These":[157],"slowdowns":[158],"are":[159,223],"caused":[160],"irregular":[163],"input-dependent":[164],"access":[165],"patterns":[166],"bilinear":[169],"interpolation":[170],"operator,":[171],"which":[172],"has":[173],"disproportionately":[175],"low":[176],"arithmetic":[177],"intensity":[178],"(AI)":[179],"compared":[180],"rest":[183],"DCN.":[186],"To":[187],"address":[188],"disproportionate":[190],"enable":[195],"their":[196],"expanded":[197],"propose":[202],"DefT,":[203],"series":[205],"workload-aware":[207],"optimizations":[208],"kernels.":[211],"DefT":[212,257],"identifies":[213],"bottlenecks":[215],"fuses":[219],"specific":[220],"operators":[221],"observed":[224],"limit":[226],"AI.":[228],"Our":[229],"approach":[230],"uses":[232],"statistical":[233],"information":[234],"workloads":[237],"adapt":[239],"workload":[241],"tiling":[242],"dimensions,":[247],"minimizing":[248],"costly":[249],"out-of-boundary":[250],"accesses.":[252],"Experimental":[253],"results":[254],"mitigates":[258],"up":[259,277],"half":[261],"over":[265],"current-art":[267],"PyTorch":[268],"implementation.":[269],"This":[270],"translates":[271],"layerwise":[274],"speedup":[275],"134%":[279],"reduction":[282],"normalized":[284],"training":[285],"46%":[288],"fully":[291],"DCN-enabled":[292],"ResNet":[293],"model.":[294]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
