{"id":"https://openalex.org/W4408325176","doi":"https://doi.org/10.1109/globecom52923.2024.10901686","title":"Megatuner: An Offline DCQCN Parameters Tuner For Large-scale Models","display_name":"Megatuner: An Offline DCQCN Parameters Tuner For Large-scale Models","publication_year":2024,"publication_date":"2024-12-08","ids":{"openalex":"https://openalex.org/W4408325176","doi":"https://doi.org/10.1109/globecom52923.2024.10901686"},"language":"en","primary_location":{"id":"doi:10.1109/globecom52923.2024.10901686","is_oa":false,"landing_page_url":"https://doi.org/10.1109/globecom52923.2024.10901686","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"GLOBECOM 2024 - 2024 IEEE Global Communications Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101550167","display_name":"Xin Qi","orcid":"https://orcid.org/0000-0003-4572-059X"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qi Xin","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,School of Information and Communication Engineering,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,School of Information and Communication Engineering,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100679196","display_name":"Xiaoxiang Wang","orcid":"https://orcid.org/0000-0002-2924-2295"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoxiang Wang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,School of Information and Communication Engineering,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,School of Information and Communication Engineering,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078695626","display_name":"He Liu","orcid":"https://orcid.org/0000-0001-9177-9459"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"He Liu","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,School of Information and Communication Engineering,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,School of Information and Communication Engineering,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008653012","display_name":"Feng Jiao","orcid":"https://orcid.org/0000-0003-2590-7074"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fangzheng Jiao","raw_affiliation_strings":["Infrawaves, Co,China"],"affiliations":[{"raw_affiliation_string":"Infrawaves, Co,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034808133","display_name":"Xiaohe Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaohe Hu","raw_affiliation_strings":["Infrawaves, Co,China"],"affiliations":[{"raw_affiliation_string":"Infrawaves, Co,China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100459993","display_name":"Jing Yang","orcid":"https://orcid.org/0000-0003-0164-9699"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang Jing","raw_affiliation_strings":["Infrawaves, Co,China"],"affiliations":[{"raw_affiliation_string":"Infrawaves, Co,China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101550167"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25956318,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3823","last_page":"3828"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.7346000075340271,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.7346000075340271,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/tuner","display_name":"Tuner","score":0.6946632862091064},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.591830849647522},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.50364750623703},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.14156010746955872},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.11690756678581238},{"id":"https://openalex.org/keywords/radio-frequency","display_name":"Radio frequency","score":0.09643849730491638}],"concepts":[{"id":"https://openalex.org/C9819579","wikidata":"https://www.wikidata.org/wiki/Q1544018","display_name":"Tuner","level":3,"score":0.6946632862091064},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.591830849647522},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.50364750623703},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.14156010746955872},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.11690756678581238},{"id":"https://openalex.org/C74064498","wikidata":"https://www.wikidata.org/wiki/Q3396184","display_name":"Radio frequency","level":2,"score":0.09643849730491638},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/globecom52923.2024.10901686","is_oa":false,"landing_page_url":"https://doi.org/10.1109/globecom52923.2024.10901686","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"GLOBECOM 2024 - 2024 IEEE Global Communications Conference","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.550000011920929}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W1988150362","https://openalex.org/W2440033994","https://openalex.org/W2498764059","https://openalex.org/W2968108410","https://openalex.org/W3046470751","https://openalex.org/W3192114749","https://openalex.org/W4285298657","https://openalex.org/W4386709668","https://openalex.org/W6767997687","https://openalex.org/W6784425352"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2154107929","https://openalex.org/W4290057712","https://openalex.org/W2168846948","https://openalex.org/W2559413996","https://openalex.org/W2559844932","https://openalex.org/W2185884583","https://openalex.org/W2096827166"],"abstract_inverted_index":{"As":[0],"high-performance":[1],"computing":[2,9],"clusters":[3],"(HPCC)":[4],"scale":[5],"up":[6],"and":[7,52,70,72,112,149],"single-node":[8],"power":[10],"improves,":[11],"the":[12,31,124,135,139,172],"bottle-neck":[13],"of":[14,107,127,141,174,183],"large-scale":[15,128,142],"models":[16],"training":[17,94],"is":[18,84],"shifting":[19],"from":[20],"computation":[21],"to":[22,78,97],"network":[23,54,66,184],"communication.":[24],"Datacenter":[25],"Quantized":[26],"Congestion":[27],"Notification":[28],"(DCQCN),":[29],"as":[30],"most":[32],"widely":[33],"used":[34],"congestion":[35],"control":[36],"algorithm":[37],"in":[38,82,90,150,177],"lossless":[39],"Remote":[40],"Direct":[41],"Memory":[42],"Access":[43],"(RDMA)":[44],"networks":[45],"today,":[46],"provides":[47],"HPCC":[48],"with":[49],"low-latency,":[50],"high-bandwidth,":[51],"high-quality":[53],"services.":[55],"However,":[56],"DCQCN":[57,119],"has":[58],"more":[59],"than":[60],"ten":[61],"adjustable":[62],"parameters":[63,81,120,157],"on":[64,75,134,146,166],"RDMA-enabled":[65],"inter-face":[67],"cards":[68],"(RNICs)":[69],"switches,":[71],"currently,":[73],"relying":[74],"expert":[76,167],"experience":[77],"adjust":[79],"these":[80],"engineering":[83],"inefficient.":[85],"Additionally,":[86],"even":[87],"minor":[88],"increases":[89],"single-iteration":[91],"time":[92],"during":[93],"can":[95],"lead":[96],"significant":[98],"resource":[99],"wastage.":[100],"Therefore,":[101],"after":[102],"conducting":[103],"an":[104],"in-depth":[105],"analysis":[106],"different":[108],"heuristic":[109],"search":[110],"algorithms":[111],"loss":[113],"functions,":[114],"we":[115],"propose":[116],"Megatuner\u2014an":[117],"offline":[118],"tuner":[121],"tailored":[122],"for":[123],"traffic":[125,132],"pattern":[126],"models.":[129],"We":[130,169],"conducted":[131],"simulations":[133],"collective":[136],"communications":[137],"at":[138],"foundation":[140],"models,":[143],"testing":[144],"both":[145],"simulation":[147],"platforms":[148],"real":[151,178],"environments.":[152],"The":[153],"results":[154],"show":[155],"that":[156],"tuned":[158],"by":[159],"Megatuner":[160],"significantly":[161],"outperform":[162],"those":[163],"set":[164],"based":[165],"experience.":[168],"also":[170],"addresses":[171],"issue":[173],"bandwidth":[175],"degradation":[176],"environments,":[179],"ensuring":[180],"long-term":[181],"optimization":[182],"performance.":[185]},"counts_by_year":[],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
