{"id":"https://openalex.org/W4226414848","doi":"https://doi.org/10.1109/cdc45484.2021.9683491","title":"Efficient Reinforcement Learning in Resource Allocation Problems Through Permutation Invariant Multi-task Learning","display_name":"Efficient Reinforcement Learning in Resource Allocation Problems Through Permutation Invariant Multi-task Learning","publication_year":2021,"publication_date":"2021-12-14","ids":{"openalex":"https://openalex.org/W4226414848","doi":"https://doi.org/10.1109/cdc45484.2021.9683491"},"language":"en","primary_location":{"id":"doi:10.1109/cdc45484.2021.9683491","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc45484.2021.9683491","pdf_url":null,"source":{"id":"https://openalex.org/S4363607724","display_name":"2021 60th IEEE Conference on Decision and Control (CDC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 60th IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041572265","display_name":"Desmond Cai","orcid":"https://orcid.org/0000-0001-9207-1890"},"institutions":[{"id":"https://openalex.org/I327656168","display_name":"American Society For Testing and Materials","ror":"https://ror.org/01x994m09","country_code":"US","type":"other","lineage":["https://openalex.org/I327656168"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Desmond Cai","raw_affiliation_strings":["AStar,Singapore","AStar, Singapore"],"affiliations":[{"raw_affiliation_string":"AStar,Singapore","institution_ids":["https://openalex.org/I327656168"]},{"raw_affiliation_string":"AStar, Singapore","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081958802","display_name":"Shiau Hong Lim","orcid":null},"institutions":[{"id":"https://openalex.org/I4210162116","display_name":"Singapore Clinical Research Institute","ror":"https://ror.org/05c27bs83","country_code":"SG","type":"other","lineage":["https://openalex.org/I4210162116"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Shiau Hong Lim","raw_affiliation_strings":["IBM Research,Singapore","IBM Research, Singapore"],"affiliations":[{"raw_affiliation_string":"IBM Research,Singapore","institution_ids":["https://openalex.org/I4210162116"]},{"raw_affiliation_string":"IBM Research, Singapore","institution_ids":["https://openalex.org/I4210162116"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070946273","display_name":"Laura Wynter","orcid":"https://orcid.org/0000-0001-5169-0214"},"institutions":[{"id":"https://openalex.org/I4210162116","display_name":"Singapore Clinical Research Institute","ror":"https://ror.org/05c27bs83","country_code":"SG","type":"other","lineage":["https://openalex.org/I4210162116"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Laura Wynter","raw_affiliation_strings":["IBM Research,Singapore","IBM Research, Singapore"],"affiliations":[{"raw_affiliation_string":"IBM Research,Singapore","institution_ids":["https://openalex.org/I4210162116"]},{"raw_affiliation_string":"IBM Research, Singapore","institution_ids":["https://openalex.org/I4210162116"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5041572265"],"corresponding_institution_ids":["https://openalex.org/I327656168"],"apc_list":null,"apc_paid":null,"fwci":0.5026,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.68470742,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"2270","last_page":"2275"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8551394939422607},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7761578559875488},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5789413452148438},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.541439414024353},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5155906081199646},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.5070817470550537},{"id":"https://openalex.org/keywords/multi-task-learning","display_name":"Multi-task learning","score":0.48786354064941406},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.44164806604385376},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.44146889448165894},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09214556217193604}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8551394939422607},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7761578559875488},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5789413452148438},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.541439414024353},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5155906081199646},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.5070817470550537},{"id":"https://openalex.org/C28006648","wikidata":"https://www.wikidata.org/wiki/Q6934509","display_name":"Multi-task learning","level":3,"score":0.48786354064941406},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.44164806604385376},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.44146889448165894},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09214556217193604},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cdc45484.2021.9683491","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc45484.2021.9683491","pdf_url":null,"source":{"id":"https://openalex.org/S4363607724","display_name":"2021 60th IEEE Conference on Decision and Control (CDC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 60th IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.44999998807907104,"id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W2104753538","https://openalex.org/W2130599357","https://openalex.org/W2134197408","https://openalex.org/W2169743339","https://openalex.org/W2201581102","https://openalex.org/W2509374375","https://openalex.org/W2735995851","https://openalex.org/W2877093712","https://openalex.org/W2891076394","https://openalex.org/W2903941123","https://openalex.org/W2923023063","https://openalex.org/W2949571867","https://openalex.org/W2963211300","https://openalex.org/W2964615009","https://openalex.org/W2977481643","https://openalex.org/W2995481444","https://openalex.org/W3022652760","https://openalex.org/W3032554070","https://openalex.org/W3121933628","https://openalex.org/W4226414848","https://openalex.org/W4289753965","https://openalex.org/W4294555834","https://openalex.org/W4294583774","https://openalex.org/W4294635920","https://openalex.org/W4298174377","https://openalex.org/W4300092207","https://openalex.org/W4319988532","https://openalex.org/W6679630895","https://openalex.org/W6680171673","https://openalex.org/W6685644109","https://openalex.org/W6685663092","https://openalex.org/W6685726866","https://openalex.org/W6687681856","https://openalex.org/W6738483526","https://openalex.org/W6740879895","https://openalex.org/W6740961973","https://openalex.org/W6749892895","https://openalex.org/W6753050254","https://openalex.org/W6754471908","https://openalex.org/W6754665250","https://openalex.org/W6760533645","https://openalex.org/W6764897204","https://openalex.org/W6768511085","https://openalex.org/W6770566355","https://openalex.org/W6849896277"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W2031695474","https://openalex.org/W2024136090","https://openalex.org/W2586732548","https://openalex.org/W3049728571","https://openalex.org/W2964765435","https://openalex.org/W4391331176"],"abstract_inverted_index":{"One":[0],"of":[1,33,71,87],"the":[2,23,42,51,69,85,88],"main":[3],"challenges":[4],"in":[5,20,41,53],"real-world":[6,93],"reinforcement":[7],"learning":[8],"is":[9],"to":[10,64],"learn":[11],"successfully":[12],"from":[13],"limited":[14],"training":[15],"samples.":[16],"We":[17,44,82],"show":[18],"that":[19],"certain":[21],"settings,":[22],"available":[24],"data":[25],"can":[26],"be":[27],"dramatically":[28],"increased":[29],"through":[30],"a":[31,46,61,78],"form":[32],"multi-task":[34,65],"learning,":[35,66],"by":[36],"exploiting":[37],"an":[38,72],"invariance":[39,100],"property":[40,101],"tasks.":[43],"provide":[45],"theoretical":[47],"performance":[48],"bound":[49],"for":[50],"gain":[52],"sample":[54],"efficiency":[55],"under":[56],"this":[57,99],"setting.":[58],"This":[59],"motivates":[60],"new":[62],"approach":[63,90],"which":[67],"involves":[68],"design":[70],"appropriate":[73],"neural":[74],"network":[75],"architecture":[76],"and":[77,106],"prioritized":[79],"task-sampling":[80],"strategy.":[81],"demonstrate":[83],"empirically":[84],"effectiveness":[86],"proposed":[89],"on":[91],"two":[92],"sequential":[94],"resource":[95],"allocation":[96],"tasks":[97],"where":[98],"occurs:":[102],"financial":[103],"portfolio":[104],"optimization":[105],"meta":[107],"federated":[108],"learning.":[109]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}