{"id":"https://openalex.org/W3132045372","doi":"https://doi.org/10.1109/iros45743.2020.9341390","title":"Sample-Efficient Learning for Industrial Assembly using Qgraph-bounded DDPG","display_name":"Sample-Efficient Learning for Industrial Assembly using Qgraph-bounded DDPG","publication_year":2020,"publication_date":"2020-10-24","ids":{"openalex":"https://openalex.org/W3132045372","doi":"https://doi.org/10.1109/iros45743.2020.9341390","mag":"3132045372"},"language":"en","primary_location":{"id":"doi:10.1109/iros45743.2020.9341390","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros45743.2020.9341390","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091621505","display_name":"Sabrina Hoppe","orcid":"https://orcid.org/0000-0001-6958-8015"},"institutions":[{"id":"https://openalex.org/I100066346","display_name":"University of Stuttgart","ror":"https://ror.org/04vnq7t77","country_code":"DE","type":"education","lineage":["https://openalex.org/I100066346"]},{"id":"https://openalex.org/I889804353","display_name":"Robert Bosch (Germany)","ror":"https://ror.org/01fe0jt45","country_code":"DE","type":"company","lineage":["https://openalex.org/I889804353"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Sabrina Hoppe","raw_affiliation_strings":["Bosch Corporate Research, Renningen, Germany","University of Stuttgart, Germany","Bosch Corporate Research, Renningen, Germany; University of Stuttgart, Germany"],"affiliations":[{"raw_affiliation_string":"Bosch Corporate Research, Renningen, Germany","institution_ids":["https://openalex.org/I889804353"]},{"raw_affiliation_string":"University of Stuttgart, Germany","institution_ids":["https://openalex.org/I100066346"]},{"raw_affiliation_string":"Bosch Corporate Research, Renningen, Germany; University of Stuttgart, Germany","institution_ids":["https://openalex.org/I889804353","https://openalex.org/I100066346"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086334164","display_name":"Markus Giftthaler","orcid":"https://orcid.org/0000-0002-1839-9449"},"institutions":[{"id":"https://openalex.org/I889804353","display_name":"Robert Bosch (Germany)","ror":"https://ror.org/01fe0jt45","country_code":"DE","type":"company","lineage":["https://openalex.org/I889804353"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Markus Giftthaler","raw_affiliation_strings":["Bosch Center for Artificial Intelligence, Renningen, Germany","[Bosch Center for Artificial Intelligence, Renningen, Germany]"],"affiliations":[{"raw_affiliation_string":"Bosch Center for Artificial Intelligence, Renningen, Germany","institution_ids":["https://openalex.org/I889804353"]},{"raw_affiliation_string":"[Bosch Center for Artificial Intelligence, Renningen, Germany]","institution_ids":["https://openalex.org/I889804353"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035020090","display_name":"Robert Krug","orcid":null},"institutions":[{"id":"https://openalex.org/I889804353","display_name":"Robert Bosch (Germany)","ror":"https://ror.org/01fe0jt45","country_code":"DE","type":"company","lineage":["https://openalex.org/I889804353"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Robert Krug","raw_affiliation_strings":["Bosch Corporate Research, Renningen, Germany","Bosch Corporate Research,Renningen,Germany"],"affiliations":[{"raw_affiliation_string":"Bosch Corporate Research, Renningen, Germany","institution_ids":["https://openalex.org/I889804353"]},{"raw_affiliation_string":"Bosch Corporate Research,Renningen,Germany","institution_ids":["https://openalex.org/I889804353"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065672819","display_name":"Marc Toussaint","orcid":"https://orcid.org/0000-0002-5487-6767"},"institutions":[{"id":"https://openalex.org/I100066346","display_name":"University of Stuttgart","ror":"https://ror.org/04vnq7t77","country_code":"DE","type":"education","lineage":["https://openalex.org/I100066346"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Marc Toussaint","raw_affiliation_strings":["University of Stuttgart, Germany","Univ. of Stuttgart (Germany)"],"affiliations":[{"raw_affiliation_string":"University of Stuttgart, Germany","institution_ids":["https://openalex.org/I100066346"]},{"raw_affiliation_string":"Univ. of Stuttgart (Germany)","institution_ids":["https://openalex.org/I100066346"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5091621505"],"corresponding_institution_ids":["https://openalex.org/I100066346","https://openalex.org/I889804353"],"apc_list":null,"apc_paid":null,"fwci":1.0605,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.82791483,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"9080","last_page":"9087"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.8092215061187744},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7859596610069275},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7020732164382935},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6167759895324707},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5848649740219116},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5425037741661072},{"id":"https://openalex.org/keywords/scratch","display_name":"Scratch","score":0.5244491696357727},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.49061575531959534},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4871639907360077},{"id":"https://openalex.org/keywords/bounded-function","display_name":"Bounded function","score":0.4601929485797882},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12986692786216736},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08106407523155212}],"concepts":[{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.8092215061187744},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7859596610069275},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7020732164382935},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6167759895324707},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5848649740219116},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5425037741661072},{"id":"https://openalex.org/C2781235140","wikidata":"https://www.wikidata.org/wiki/Q275131","display_name":"Scratch","level":2,"score":0.5244491696357727},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.49061575531959534},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4871639907360077},{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.4601929485797882},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12986692786216736},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08106407523155212},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros45743.2020.9341390","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros45743.2020.9341390","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5299999713897705,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":59,"referenced_works":["https://openalex.org/W15078749","https://openalex.org/W92261402","https://openalex.org/W1646707810","https://openalex.org/W1757796397","https://openalex.org/W1967948264","https://openalex.org/W1977655452","https://openalex.org/W1978549191","https://openalex.org/W2105360813","https://openalex.org/W2110697446","https://openalex.org/W2145339207","https://openalex.org/W2155007355","https://openalex.org/W2173248099","https://openalex.org/W2587103201","https://openalex.org/W2623491082","https://openalex.org/W2754517384","https://openalex.org/W2762453223","https://openalex.org/W2783824884","https://openalex.org/W2902098903","https://openalex.org/W2904246096","https://openalex.org/W2905364877","https://openalex.org/W2911087563","https://openalex.org/W2911114506","https://openalex.org/W2913469662","https://openalex.org/W2924131335","https://openalex.org/W2959488596","https://openalex.org/W2962736495","https://openalex.org/W2963120839","https://openalex.org/W2963196832","https://openalex.org/W2963859097","https://openalex.org/W2963864421","https://openalex.org/W2963940579","https://openalex.org/W2964161785","https://openalex.org/W2964333597","https://openalex.org/W2967717386","https://openalex.org/W2967727187","https://openalex.org/W3004055318","https://openalex.org/W3099530328","https://openalex.org/W3130717831","https://openalex.org/W3136620014","https://openalex.org/W3158726102","https://openalex.org/W4287719180","https://openalex.org/W4288319859","https://openalex.org/W4289115845","https://openalex.org/W4294310005","https://openalex.org/W4298857966","https://openalex.org/W6637967152","https://openalex.org/W6682849425","https://openalex.org/W6684921986","https://openalex.org/W6739193204","https://openalex.org/W6745510923","https://openalex.org/W6745511974","https://openalex.org/W6745540326","https://openalex.org/W6756848961","https://openalex.org/W6757592117","https://openalex.org/W6758873523","https://openalex.org/W6760724441","https://openalex.org/W6764053384","https://openalex.org/W6764800732","https://openalex.org/W6773139877"],"related_works":["https://openalex.org/W4281847915","https://openalex.org/W3000635674","https://openalex.org/W4400868993","https://openalex.org/W2145363145","https://openalex.org/W2341346307","https://openalex.org/W2154399718","https://openalex.org/W4321463377","https://openalex.org/W4384574988","https://openalex.org/W2768629321","https://openalex.org/W2130711276"],"abstract_inverted_index":{"Recent":[0],"progress":[1],"in":[2,79,118,181],"deep":[3],"reinforcement":[4,72],"learning":[5,73,143],"has":[6],"enabled":[7],"agents":[8],"to":[9,88],"autonomously":[10],"learn":[11],"complex":[12],"control":[13],"strategies":[14],"from":[15],"scratch.":[16],"Model-free":[17],"approaches":[18],"like":[19],"Deep":[20],"Deterministic":[21],"Policy":[22],"Gradients":[23],"(DDPG)":[24],"seem":[25],"promising":[26],"for":[27,54,69],"applications":[28],"with":[29,74,158],"intricate":[30],"dynamics,":[31],"such":[32],"as":[33],"contact-rich":[34],"manipulation":[35],"tasks.":[36],"However,":[37],"these":[38],"methods":[39],"typically":[40],"require":[41],"large":[42,129],"amounts":[43],"of":[44,131,139,154,171],"training":[45],"data":[46,175],"or":[47],"meticulous":[48],"hyperparameter":[49],"tuning,":[50],"limiting":[51],"their":[52],"usefulness":[53],"real-world":[55,91],"robotics":[56],"applications.":[57],"In":[58],"this":[59],"paper,":[60],"we":[61],"evaluate":[62],"and":[63,95,115,133,137,156,178],"benchmark":[64],"our":[65],"recently":[66],"proposed":[67],"approach":[68],"improving":[70],"model-free":[71],"DDPG":[75],"through":[76],"Qgraph-based":[77],"bounds":[78],"temporal":[80],"difference":[81],"learning.":[82],"We":[83,121],"directly":[84],"apply":[85],"the":[86,105,135,142,163,167,172],"algorithm":[87],"a":[89,128],"challenging":[90],"industrial":[92],"insertion":[93,106],"task":[94,107],"assess":[96],"its":[97],"performance":[98,138],"(see":[99],"https://youtu.be/Z_GcNbCWE-E).":[100],"Empirical":[101],"results":[102,165],"show":[103],"that":[104],"can":[108,145],"be":[109,146],"learned":[110],"despite":[111],"significant":[112],"frictional":[113],"forces":[114],"uncertainty,":[116],"even":[117],"sparse-reward":[119],"settings.":[120],"present":[122],"an":[123],"in-depth":[124],"comparison":[125],"based":[126],"on":[127],"number":[130],"experiments":[132],"demonstrate":[134],"advantages":[136],"Qgraph-bounded":[140],"DDPG:":[141],"process":[144],"significantly":[147],"sped":[148],"up,":[149],"robustified":[150],"against":[151],"bad":[152],"choices":[153],"hyperparameters":[155],"runs":[157],"less":[159],"memory":[160],"requirements.":[161],"Lastly,":[162],"presented":[164],"extend":[166],"current":[168],"theoretical":[169],"understanding":[170],"link":[173],"between":[174],"graph":[176],"structure":[177],"soft":[179],"divergence":[180],"DDPG.":[182]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
