{"id":"https://openalex.org/W7139959235","doi":"https://doi.org/10.1109/hipc66333.2025.00034","title":"Optimizing Deployment of Unstructured Group Convolutions for Low Latency Inference","display_name":"Optimizing Deployment of Unstructured Group Convolutions for Low Latency Inference","publication_year":2025,"publication_date":"2025-12-17","ids":{"openalex":"https://openalex.org/W7139959235","doi":"https://doi.org/10.1109/hipc66333.2025.00034"},"language":null,"primary_location":{"id":"doi:10.1109/hipc66333.2025.00034","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hipc66333.2025.00034","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 32nd International Conference on High Performance Computing, Data, and Analytics (HiPC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100435361","display_name":"Ling Li","orcid":"https://orcid.org/0000-0002-9245-7387"},"institutions":[{"id":"https://openalex.org/I58956616","display_name":"Case Western Reserve University","ror":"https://ror.org/051fd9666","country_code":"US","type":"education","lineage":["https://openalex.org/I58956616"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Changxin Li","raw_affiliation_strings":["Case Western Reserve University,Computer and Data Sciences,Cleveland,USA"],"affiliations":[{"raw_affiliation_string":"Case Western Reserve University,Computer and Data Sciences,Cleveland,USA","institution_ids":["https://openalex.org/I58956616"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015036207","display_name":"Sanmukh R. Kuppannagari","orcid":"https://orcid.org/0000-0002-2062-1483"},"institutions":[{"id":"https://openalex.org/I58956616","display_name":"Case Western Reserve University","ror":"https://ror.org/051fd9666","country_code":"US","type":"education","lineage":["https://openalex.org/I58956616"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sanmukh Kuppannagari","raw_affiliation_strings":["Case Western Reserve University,Computer and Data Sciences,Cleveland,USA"],"affiliations":[{"raw_affiliation_string":"Case Western Reserve University,Computer and Data Sciences,Cleveland,USA","institution_ids":["https://openalex.org/I58956616"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100435361"],"corresponding_institution_ids":["https://openalex.org/I58956616"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.88100786,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"257","last_page":"267"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.1785999983549118,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.1785999983549118,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.17339999973773956,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.16940000653266907,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.6226000189781189},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5202000141143799},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.38440001010894775},{"id":"https://openalex.org/keywords/group","display_name":"Group (periodic table)","score":0.36489999294281006},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.32820001244544983}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7590000033378601},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.6226000189781189},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5202000141143799},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.41019999980926514},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.38440001010894775},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3725000023841858},{"id":"https://openalex.org/C2781311116","wikidata":"https://www.wikidata.org/wiki/Q83306","display_name":"Group (periodic table)","level":2,"score":0.36489999294281006},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.32820001244544983},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.3019999861717224},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.2994000017642975},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.28600001335144043},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27630001306533813},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.2597000002861023},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2549999952316284}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hipc66333.2025.00034","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hipc66333.2025.00034","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 32nd International Conference on High Performance Computing, Data, and Analytics (HiPC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1903029394","https://openalex.org/W2046140068","https://openalex.org/W2112796928","https://openalex.org/W2194775991","https://openalex.org/W2549139847","https://openalex.org/W2963125010","https://openalex.org/W2963363373","https://openalex.org/W2963446712","https://openalex.org/W2963993763","https://openalex.org/W2965862350","https://openalex.org/W2965862774","https://openalex.org/W3107401339","https://openalex.org/W4292793947","https://openalex.org/W4318969866","https://openalex.org/W4407784688"],"related_works":[],"abstract_inverted_index":{"Group":[0],"convolutions":[1,87],"are":[2,89],"widely":[3],"adopted":[4],"in":[5,91],"modern":[6],"CNN":[7],"architectures":[8],"such":[9,94],"as":[10,95],"CondenseNet":[11,175],"and":[12,27,48,76,118,149,174,207],"ShuffleNet":[13,173],"to":[14,53,80,106,114,125,128,161,178],"enable":[15],"efficient":[16],"inference":[17],"on":[18,153,172],"GPUs.":[19],"However,":[20],"when":[21],"the":[22,82,112,129,134,154,157,164,200],"connection":[23],"pattern":[24,160],"between":[25],"input":[26],"output":[28,132],"channels":[29],"does":[30],"not":[31],"exhibit":[32],"regularity":[33],"(unstructured":[34],"group":[35,86,99],"convolution),":[36],"popular":[37,92],"deep":[38],"learning":[39],"frameworks":[40],"(e.g.,":[41],"PyTorch)":[42],"often":[43],"struggle":[44],"with":[45,71,189],"load":[46,205],"balancing":[47,206],"data":[49,208],"reuse":[50],"issues":[51],"leading":[52],"reduced":[54],"performance.":[55],"In":[56],"this":[57],"paper,":[58],"we":[59,102],"present":[60],"a":[61,67],"comprehensive":[62],"optimization":[63],"framework":[64],"that":[65,97],"combines":[66],"Knapsack-based":[68],"partitioning":[69],"approach":[70],"Integer":[72,121],"Linear":[73,122],"Programming":[74,123],"(ILP)":[75,124],"advanced":[77],"matrix":[78,141,155],"reordering":[79,142,190],"optimize":[81],"deployment":[83],"of":[84,166,202],"unstructured":[85],"which":[88],"used":[90],"models":[93],"CondenseNets":[96],"learn":[98],"connections.":[100],"Specifically,":[101],"use":[103,119],"knapsack":[104,135],"algorithm":[105],"determine":[107],"partition":[108],"(group)":[109,131],"sizes":[110],"for":[111,204],"connections":[113,127],"minimize":[115],"execution":[116],"time":[117],"an":[120,192],"assign":[126],"partitions":[130],"by":[133],"algorithm.":[136,169],"We":[137],"also":[138],"employ":[139],"three":[140],"strategies-Hierarchical":[143],"Clustering":[144,147],"(HC),":[145],"Iterative":[146],"(IC),":[148],"Reverse":[150],"Cuthill-McKee":[151],"(RCM)":[152],"representing":[156],"input-output":[158],"connectivity":[159],"further":[162],"improve":[163],"performance":[165],"our":[167],"scheduling":[168],"Our":[170],"experiments":[171],"demonstrate":[176],"up":[177],"<tex":[179,194],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[180,195],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$1.9":[181],"\\times$</tex>":[182,197],"speedups":[183],"over":[184],"PyTorch.":[185],"Furthermore,":[186],"augmenting":[187],"ILP":[188],"achieves":[191],"additional":[193],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$1.3":[196],"improvement":[198],"demonstrating":[199],"importance":[201],"optimizing":[203],"reuse.":[209]},"counts_by_year":[],"updated_date":"2026-03-22T06:25:25.174409","created_date":"2026-03-21T00:00:00"}
