{"id":"https://openalex.org/W4405490700","doi":"https://doi.org/10.1109/iccp63557.2024.10792984","title":"Policy Transfer with Maximum Entropy Deep Reinforcement Learning for Real Cart-Pole Control","display_name":"Policy Transfer with Maximum Entropy Deep Reinforcement Learning for Real Cart-Pole Control","publication_year":2024,"publication_date":"2024-10-17","ids":{"openalex":"https://openalex.org/W4405490700","doi":"https://doi.org/10.1109/iccp63557.2024.10792984"},"language":"en","primary_location":{"id":"doi:10.1109/iccp63557.2024.10792984","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccp63557.2024.10792984","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 20th International Conference on Intelligent Computer Communication and Processing (ICCP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093236710","display_name":"Tudor-Andrei Paleu","orcid":"https://orcid.org/0009-0009-1955-9588"},"institutions":[{"id":"https://openalex.org/I4210108695","display_name":"Gheorghe Asachi Technical University of Ia\u0219i","ror":"https://ror.org/014zxnz40","country_code":"RO","type":"education","lineage":["https://openalex.org/I4210108695"]}],"countries":["RO"],"is_corresponding":true,"raw_author_name":"Tudor-Andrei Paleu","raw_affiliation_strings":["Gheorghe Asachi Technical University of Iasi,Department of Automatic Control and Applied Informatics,Iasi,Romania,700050"],"affiliations":[{"raw_affiliation_string":"Gheorghe Asachi Technical University of Iasi,Department of Automatic Control and Applied Informatics,Iasi,Romania,700050","institution_ids":["https://openalex.org/I4210108695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003446848","display_name":"Adrian Burlacu","orcid":"https://orcid.org/0000-0003-3844-1808"},"institutions":[{"id":"https://openalex.org/I4210108695","display_name":"Gheorghe Asachi Technical University of Ia\u0219i","ror":"https://ror.org/014zxnz40","country_code":"RO","type":"education","lineage":["https://openalex.org/I4210108695"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Adrian Burlacu","raw_affiliation_strings":["Gheorghe Asachi Technical University of Iasi,Department of Automatic Control and Applied Informatics,Iasi,Romania,700050"],"affiliations":[{"raw_affiliation_string":"Gheorghe Asachi Technical University of Iasi,Department of Automatic Control and Applied Informatics,Iasi,Romania,700050","institution_ids":["https://openalex.org/I4210108695"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021649946","display_name":"Carlos Pascal","orcid":"https://orcid.org/0000-0002-2685-0105"},"institutions":[{"id":"https://openalex.org/I4210108695","display_name":"Gheorghe Asachi Technical University of Ia\u0219i","ror":"https://ror.org/014zxnz40","country_code":"RO","type":"education","lineage":["https://openalex.org/I4210108695"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Carlos Pascal","raw_affiliation_strings":["Gheorghe Asachi Technical University of Iasi,Department of Automatic Control and Applied Informatics,Iasi,Romania,700050"],"affiliations":[{"raw_affiliation_string":"Gheorghe Asachi Technical University of Iasi,Department of Automatic Control and Applied Informatics,Iasi,Romania,700050","institution_ids":["https://openalex.org/I4210108695"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5093236710"],"corresponding_institution_ids":["https://openalex.org/I4210108695"],"apc_list":null,"apc_paid":null,"fwci":0.6502,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.69155959,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10805","display_name":"Vehicle Dynamics and Control Systems","score":0.9674000144004822,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10805","display_name":"Vehicle Dynamics and Control Systems","score":0.9674000144004822,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10808","display_name":"Electric and Hybrid Vehicle Technologies","score":0.9505000114440918,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.9200000166893005,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6612276434898376},{"id":"https://openalex.org/keywords/cart","display_name":"Cart","score":0.6057758331298828},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5677968859672546},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.504287600517273},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.47285130620002747},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.46765655279159546},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.4591583013534546},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4132162928581238},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1900726556777954}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6612276434898376},{"id":"https://openalex.org/C2777275308","wikidata":"https://www.wikidata.org/wiki/Q234668","display_name":"Cart","level":2,"score":0.6057758331298828},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5677968859672546},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.504287600517273},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.47285130620002747},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.46765655279159546},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.4591583013534546},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4132162928581238},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1900726556777954},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccp63557.2024.10792984","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccp63557.2024.10792984","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 20th International Conference on Intelligent Computer Communication and Processing (ICCP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2145339207","https://openalex.org/W2754517384","https://openalex.org/W2907537824","https://openalex.org/W2921254774","https://openalex.org/W2964114602","https://openalex.org/W2967727187","https://openalex.org/W2999862950","https://openalex.org/W3110981250","https://openalex.org/W3126321819","https://openalex.org/W3137070631","https://openalex.org/W3206620955","https://openalex.org/W3212380917","https://openalex.org/W4221155471","https://openalex.org/W4383112908","https://openalex.org/W4386285856","https://openalex.org/W4388563503","https://openalex.org/W6680657880","https://openalex.org/W6734517396","https://openalex.org/W6747473740","https://openalex.org/W6780559895","https://openalex.org/W6841950565","https://openalex.org/W6858494370"],"related_works":["https://openalex.org/W4400093351","https://openalex.org/W3193909393","https://openalex.org/W2322350723","https://openalex.org/W2618011907","https://openalex.org/W4226229889","https://openalex.org/W2945144341","https://openalex.org/W4229981964","https://openalex.org/W2974098477","https://openalex.org/W3126687991","https://openalex.org/W2258783214"],"abstract_inverted_index":{"Transferring":[0],"learned":[1],"controllers":[2,33,177],"from":[3,94],"simulation":[4,95],"to":[5,28,54,58,77,82,96,128,159,171],"real-world":[6],"domains":[7],"is":[8],"rarely":[9],"observed":[10],"for":[11,44,67,150],"deep":[12,36,89,113],"reinforcement":[13,37,90],"learning":[14,38,91,186],"applications,":[15],"even":[16],"physics-based":[17],"tasks":[18],"like":[19],"the":[20,29,63,105,111,124,129,133,141,153,161,166,172,179,185],"inverted":[21],"pendulum":[22],"being":[23],"particularly":[24],"difficult":[25],"when":[26],"transitioning":[27],"physical":[30],"world.":[31],"Model-free":[32],"based":[34],"on":[35,123],"have":[39],"been":[40],"a":[41,83,88,97,109,146],"successful":[42],"choice":[43],"many":[45],"tasks.":[46],"They":[47],"are":[48,188],"preferred":[49],"over":[50],"model-based":[51],"ones":[52],"due":[53],"their":[55],"adaptive":[56],"capability":[57],"situations":[59],"not":[60],"included":[61],"in":[62,72,184],"initial":[64],"design":[65,162],"and":[66,136,164,178,190],"requiring":[68],"less":[69],"advanced":[70],"expertise":[71],"control.":[73,167],"This":[74],"paper":[75],"aims":[76],"apply":[78],"learning-based":[79],"model-free":[80,117],"control":[81],"real":[84,98,142],"cart-pole":[85],"system":[86],"using":[87],"policy":[92,156],"transferred":[93,158],"environment":[99,126],"without":[100],"any":[101],"fine-tuning":[102],"involved":[103],"with":[104],"dynamics":[106],"model.":[107],"In":[108,140],"simulation,":[110],"baseline":[112],"Q-learning,":[114,131],"an":[115,120],"off-policy":[116],"algorithm,":[118],"delivered":[119],"inferior":[121],"performance":[122],"Cart-Pole":[125],"compared":[127],"soft":[130,154],"keeping":[132],"same":[134],"hyper-parameters":[135],"ensuring":[137],"reproducible":[138],"outcomes.":[139],"experimental":[143],"setup,":[144],"besides":[145],"PD":[147,176],"controller":[148],"feasible":[149],"under-actuated":[151],"systems,":[152],"version":[155],"was":[157],"accelerate":[160],"process":[163],"enhance":[165],"Additionally,":[168],"limitations":[169],"related":[170],"brittle":[173],"robustness":[174],"of":[175,181],"absence":[180],"accurate":[182],"models":[183],"phase":[187],"studied":[189],"addressed.":[191]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
