{"id":"https://openalex.org/W2618033804","doi":"https://doi.org/10.1109/ijcnn.2017.7966389","title":"Batch reinforcement learning on the industrial benchmark: First experiences","display_name":"Batch reinforcement learning on the industrial benchmark: First experiences","publication_year":2017,"publication_date":"2017-05-01","ids":{"openalex":"https://openalex.org/W2618033804","doi":"https://doi.org/10.1109/ijcnn.2017.7966389","mag":"2618033804"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn.2017.7966389","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2017.7966389","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1705.07262","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Daniel Hein","orcid":null},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]},{"id":"https://openalex.org/I1325886976","display_name":"Siemens (Germany)","ror":"https://ror.org/059mq0909","country_code":"DE","type":"company","lineage":["https://openalex.org/I1325886976"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Daniel Hein","raw_affiliation_strings":["Department of Informatics, Technische Universit\u00e4t M\u00fcnchen, Garching, Germany","Siemens AG, Corporate Technology, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Technische Universit\u00e4t M\u00fcnchen, Garching, Germany","institution_ids":["https://openalex.org/I62916508"]},{"raw_affiliation_string":"Siemens AG, Corporate Technology, Munich, Germany","institution_ids":["https://openalex.org/I1325886976"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Steffen Udluft","orcid":null},"institutions":[{"id":"https://openalex.org/I1325886976","display_name":"Siemens (Germany)","ror":"https://ror.org/059mq0909","country_code":"DE","type":"company","lineage":["https://openalex.org/I1325886976"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Steffen Udluft","raw_affiliation_strings":["Siemens AG, Corporate Technology, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Siemens AG, Corporate Technology, Munich, Germany","institution_ids":["https://openalex.org/I1325886976"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Michel Tokic","orcid":null},"institutions":[{"id":"https://openalex.org/I1325886976","display_name":"Siemens (Germany)","ror":"https://ror.org/059mq0909","country_code":"DE","type":"company","lineage":["https://openalex.org/I1325886976"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Michel Tokic","raw_affiliation_strings":["Siemens AG, Corporate Technology, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Siemens AG, Corporate Technology, Munich, Germany","institution_ids":["https://openalex.org/I1325886976"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Alexander Hentschel","orcid":null},"institutions":[{"id":"https://openalex.org/I1325886976","display_name":"Siemens (Germany)","ror":"https://ror.org/059mq0909","country_code":"DE","type":"company","lineage":["https://openalex.org/I1325886976"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Alexander Hentschel","raw_affiliation_strings":["Siemens AG, Corporate Technology, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Siemens AG, Corporate Technology, Munich, Germany","institution_ids":["https://openalex.org/I1325886976"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Thomas A. Runkler","orcid":null},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]},{"id":"https://openalex.org/I1325886976","display_name":"Siemens (Germany)","ror":"https://ror.org/059mq0909","country_code":"DE","type":"company","lineage":["https://openalex.org/I1325886976"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Thomas A. Runkler","raw_affiliation_strings":["Department of Informatics, Technische Universit\u00e4t M\u00fcnchen, Garching, Germany","Siemens AG, Corporate Technology, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Technische Universit\u00e4t M\u00fcnchen, Garching, Germany","institution_ids":["https://openalex.org/I62916508"]},{"raw_affiliation_string":"Siemens AG, Corporate Technology, Munich, Germany","institution_ids":["https://openalex.org/I1325886976"]}]},{"author_position":"last","author":{"id":null,"display_name":"Volkmar Sterzing","orcid":null},"institutions":[{"id":"https://openalex.org/I1325886976","display_name":"Siemens (Germany)","ror":"https://ror.org/059mq0909","country_code":"DE","type":"company","lineage":["https://openalex.org/I1325886976"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Volkmar Sterzing","raw_affiliation_strings":["Siemens AG, Corporate Technology, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Siemens AG, Corporate Technology, Munich, Germany","institution_ids":["https://openalex.org/I1325886976"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I1325886976","https://openalex.org/I62916508"],"apc_list":null,"apc_paid":null,"fwci":0.6231,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.76390988,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"4214","last_page":"4221"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9771000146865845,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14011","display_name":"Elevator Systems and Control","score":0.963100016117096,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.864799976348877},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8185999989509583},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.6065999865531921},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5766000151634216},{"id":"https://openalex.org/keywords/particle-swarm-optimization","display_name":"Particle swarm optimization","score":0.5394999980926514},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5142999887466431}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.864799976348877},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8185999989509583},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.6065999865531921},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5766000151634216},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5720000267028809},{"id":"https://openalex.org/C85617194","wikidata":"https://www.wikidata.org/wiki/Q2072794","display_name":"Particle swarm optimization","level":2,"score":0.5394999980926514},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5145999789237976},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5142999887466431},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4968999922275543},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43540000915527344},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4099999964237213},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.3862000107765198},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.34380000829696655},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.33070001006126404},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.266400009393692}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ijcnn.2017.7966389","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2017.7966389","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1705.07262","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1705.07262","pdf_url":"https://arxiv.org/pdf/1705.07262","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1705.07262","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1705.07262","pdf_url":"https://arxiv.org/pdf/1705.07262","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W215404843","https://openalex.org/W1547105496","https://openalex.org/W1552327263","https://openalex.org/W1567397728","https://openalex.org/W1570106308","https://openalex.org/W1584535387","https://openalex.org/W2080362542","https://openalex.org/W2109364787","https://openalex.org/W2127412976","https://openalex.org/W2143770247","https://openalex.org/W2151702863","https://openalex.org/W2152195021","https://openalex.org/W2396820603","https://openalex.org/W3041202696","https://openalex.org/W4245296547","https://openalex.org/W6602057636","https://openalex.org/W6605834550","https://openalex.org/W6606719070","https://openalex.org/W6677737365","https://openalex.org/W6797154290"],"related_works":[],"abstract_inverted_index":{"The":[0,88],"Particle":[1],"Swarm":[2],"Optimization":[3],"Policy":[4],"(PSO-P)":[5],"has":[6],"been":[7],"recently":[8],"introduced":[9],"and":[10,33,73,85,112,162],"proven":[11],"to":[12,97,150],"produce":[13],"remarkable":[14],"results":[15,90,98,159],"on":[16,35,42,93],"interacting":[17],"with":[18],"academic":[19,129],"reinforcement":[20,50],"learning":[21,51],"benchmarks":[22],"in":[23,66,145,160,172],"an":[24],"off-policy,":[25],"batch-based":[26],"setting.":[27,148],"To":[28],"further":[29],"investigate":[30],"the":[31,43,105,113,141],"properties":[32],"feasibility":[34],"real-world":[36,134],"applications,":[37,68,136],"this":[38],"paper":[39],"investigates":[40],"PSO-P":[41,92,122,156],"so-called":[44],"Industrial":[45],"Benchmark":[46],"(IB),":[47],"a":[48,61,76,166],"novel":[49],"(RL)":[52],"benchmark":[53],"that":[54,121],"aims":[55],"at":[56],"being":[57],"realistic":[58],"by":[59],"including":[60],"variety":[62],"of":[63,91,99,126,170],"aspects":[64],"found":[65],"industrial":[67,135],"such":[69],"as":[70],"continuous":[71],"state":[72,81],"action":[74],"spaces,":[75],"high":[77],"dimensional,":[78],"partially":[79],"observable":[80],"space,":[82],"delayed":[83],"effects,":[84],"complex":[86,178],"stochasticity.":[87],"experimental":[89],"IB":[94,147],"are":[95],"compared":[96],"closed-form":[100],"control":[101],"policies":[102],"derived":[103],"from":[104],"model-based":[106],"Recurrent":[107],"Control":[108],"Neural":[109,115],"Network":[110],"(RCNN)":[111],"model-free":[114],"Fitted":[116],"Q-Iteration":[117],"(NFQ).":[118],"Experiments":[119],"show":[120],"is":[123],"not":[124],"only":[125,165],"interest":[127],"for":[128,133],"benchmarks,":[130],"but":[131],"also":[132,139],"since":[137],"it":[138],"yielded":[140],"best":[142],"performing":[143],"policy":[144],"our":[146],"Compared":[149],"other":[151],"well":[152],"established":[153],"RL":[154],"techniques,":[155],"produced":[157],"outstanding":[158],"performance":[161],"robustness,":[163],"requiring":[164],"relatively":[167],"low":[168],"amount":[169],"effort":[171],"finding":[173],"adequate":[174],"parameters":[175],"or":[176],"making":[177],"design":[179],"decisions.":[180]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2017-06-05T00:00:00"}
