{"id":"https://openalex.org/W4206752580","doi":"https://doi.org/10.1177/02783649211056967","title":"Learning to solve sequential physical reasoning problems from a scene image","display_name":"Learning to solve sequential physical reasoning problems from a scene image","publication_year":2021,"publication_date":"2021-12-01","ids":{"openalex":"https://openalex.org/W4206752580","doi":"https://doi.org/10.1177/02783649211056967"},"language":"en","primary_location":{"id":"doi:10.1177/02783649211056967","is_oa":false,"landing_page_url":"https://doi.org/10.1177/02783649211056967","pdf_url":null,"source":{"id":"https://openalex.org/S73484101","display_name":"The International Journal of Robotics Research","issn_l":"0278-3649","issn":["0278-3649","1741-3176"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of Robotics Research","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080239685","display_name":"Danny Driess","orcid":"https://orcid.org/0000-0002-8258-1659"},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]},{"id":"https://openalex.org/I4210135521","display_name":"Max Planck Institute for Intelligent Systems","ror":"https://ror.org/04fq9j139","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210135521"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Danny Driess","raw_affiliation_strings":["Learning and Intelligent Systems, TU Berlin, Germany","Max-Planck Institute for Intelligent Systems, Stuttgart, Germany"],"affiliations":[{"raw_affiliation_string":"Learning and Intelligent Systems, TU Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]},{"raw_affiliation_string":"Max-Planck Institute for Intelligent Systems, Stuttgart, Germany","institution_ids":["https://openalex.org/I4210135521"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003306942","display_name":"Jung-Su Ha","orcid":"https://orcid.org/0000-0002-1024-4119"},"institutions":[{"id":"https://openalex.org/I4210135521","display_name":"Max Planck Institute for Intelligent Systems","ror":"https://ror.org/04fq9j139","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210135521"]},{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jung-Su Ha","raw_affiliation_strings":["Learning and Intelligent Systems, TU Berlin, Germany","Max-Planck Institute for Intelligent Systems, Stuttgart, Germany"],"affiliations":[{"raw_affiliation_string":"Learning and Intelligent Systems, TU Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]},{"raw_affiliation_string":"Max-Planck Institute for Intelligent Systems, Stuttgart, Germany","institution_ids":["https://openalex.org/I4210135521"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065672819","display_name":"Marc Toussaint","orcid":"https://orcid.org/0000-0002-5487-6767"},"institutions":[{"id":"https://openalex.org/I4210135521","display_name":"Max Planck Institute for Intelligent Systems","ror":"https://ror.org/04fq9j139","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210135521"]},{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Marc Toussaint","raw_affiliation_strings":["Learning and Intelligent Systems, TU Berlin, Germany","Max-Planck Institute for Intelligent Systems, Stuttgart, Germany"],"affiliations":[{"raw_affiliation_string":"Learning and Intelligent Systems, TU Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]},{"raw_affiliation_string":"Max-Planck Institute for Intelligent Systems, Stuttgart, Germany","institution_ids":["https://openalex.org/I4210135521"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5080239685"],"corresponding_institution_ids":["https://openalex.org/I4210135521","https://openalex.org/I4577782"],"apc_list":null,"apc_paid":null,"fwci":1.1528,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.81091503,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"40","issue":"12-14","first_page":"1435","last_page":"1466"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5930887460708618},{"id":"https://openalex.org/keywords/motion-planning","display_name":"Motion planning","score":0.5752885937690735},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5727587342262268},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.5364760756492615},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5177921056747437},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5103968977928162},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5101533532142639},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.47937896847724915},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.46918630599975586},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.45912137627601624},{"id":"https://openalex.org/keywords/kinematics","display_name":"Kinematics","score":0.4135863184928894},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3872695565223694},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.37188592553138733},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.14257067441940308}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5930887460708618},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.5752885937690735},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5727587342262268},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.5364760756492615},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5177921056747437},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5103968977928162},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5101533532142639},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.47937896847724915},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.46918630599975586},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.45912137627601624},{"id":"https://openalex.org/C39920418","wikidata":"https://www.wikidata.org/wiki/Q11476","display_name":"Kinematics","level":2,"score":0.4135863184928894},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3872695565223694},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.37188592553138733},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.14257067441940308},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1177/02783649211056967","is_oa":false,"landing_page_url":"https://doi.org/10.1177/02783649211056967","pdf_url":null,"source":{"id":"https://openalex.org/S73484101","display_name":"The International Journal of Robotics Research","issn_l":"0278-3649","issn":["0278-3649","1741-3176"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of Robotics Research","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6000000238418579,"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W398859631","https://openalex.org/W2031738727","https://openalex.org/W2057408106","https://openalex.org/W2135319191","https://openalex.org/W2141841102","https://openalex.org/W2166254514","https://openalex.org/W2172082560","https://openalex.org/W2412154694","https://openalex.org/W2528489519","https://openalex.org/W2559655401","https://openalex.org/W2569188995","https://openalex.org/W2582998992","https://openalex.org/W2593390416","https://openalex.org/W2600077159","https://openalex.org/W2612690371","https://openalex.org/W2615497679","https://openalex.org/W2736258566","https://openalex.org/W2739341730","https://openalex.org/W2788882124","https://openalex.org/W2794922736","https://openalex.org/W2805883505","https://openalex.org/W2885010347","https://openalex.org/W2913995205","https://openalex.org/W2914044489","https://openalex.org/W2937206389","https://openalex.org/W2945600613","https://openalex.org/W2963150697","https://openalex.org/W2963439114","https://openalex.org/W2963634205","https://openalex.org/W2963903510","https://openalex.org/W2964259080","https://openalex.org/W2967168619","https://openalex.org/W2968967434","https://openalex.org/W3003355693","https://openalex.org/W3004020714","https://openalex.org/W3039141704","https://openalex.org/W3088321106","https://openalex.org/W3089482831","https://openalex.org/W3090701469","https://openalex.org/W3090717212","https://openalex.org/W3090971691","https://openalex.org/W3091613793","https://openalex.org/W3205304947","https://openalex.org/W4212774754"],"related_works":["https://openalex.org/W2789522126","https://openalex.org/W2066693961","https://openalex.org/W2368363778","https://openalex.org/W122584421","https://openalex.org/W4244295168","https://openalex.org/W2753351751","https://openalex.org/W3185180338","https://openalex.org/W2889348933","https://openalex.org/W2351643838","https://openalex.org/W3033677963"],"abstract_inverted_index":{"In":[0],"this":[1,119],"article,":[2],"we":[3,122],"propose":[4],"deep":[5,124],"visual":[6,125],"reasoning,":[7],"which":[8,110],"is":[9,189],"a":[10,48,72,79,95,108,129,136,159,186,212],"convolutional":[11],"recurrent":[12],"neural":[13,137,208],"network":[14,138],"that":[15,29,146,218],"predicts":[16,140],"discrete":[17,50,69,92,142,277],"action":[18,65,93,143,278],"sequences":[19,66,144],"from":[20],"an":[21],"initial":[22,131],"scene":[23,197],"image":[24,132],"for":[25,31,238],"sequential":[26],"manipulation":[27],"problems":[28,41,101,226],"arise,":[30],"example,":[32],"in":[33,76,201,232,267],"task":[34],"and":[35,173,198],"motion":[36,57,150],"planning":[37,58,100,151],"(TAMP).":[38],"Typical":[39],"TAMP":[40,164],"are":[42],"formalized":[43],"by":[44,191],"combining":[45],"reasoning":[46],"on":[47,71,128,181],"symbolic,":[49],"level":[51],"(e.g.,":[52],"first-order":[53],"logic)":[54],"with":[55,171],"continuous":[56],"such":[59,145,227],"as":[60,204,228],"nonlinear":[61,80],"trajectory":[62,81],"optimization.":[63],"The":[64],"represent":[67],"the":[68,86,112,134,162,193,196,199,207,219,246,271,276],"decisions":[70],"symbolic":[73],"level,":[74],"which,":[75],"turn,":[77],"parameterize":[78],"optimization":[82],"problem.":[83,165],"Owing":[84],"to":[85,103,106,154,157,161,169,206,251,273],"great":[87],"combinatorial":[88,120],"complexity":[89],"of":[90,98,114,133,176,195,211,261,264],"possible":[91,190],"sequences,":[94],"large":[96],"number":[97],"optimization/motion":[99],"have":[102],"be":[104,155],"solved":[105,156],"find":[107,158],"solution,":[109],"limits":[111],"scalability":[113],"these":[115],"approaches.":[116],"To":[117],"circumvent":[118],"complexity,":[121],"introduce":[123],"reasoning:":[126],"based":[127],"segmented":[130],"scene,":[135],"directly":[139],"promising":[141],"ideally":[147],"only":[148,182,223],"one":[149],"problem":[152],"has":[153],"solution":[160],"overall":[163],"Our":[166],"method":[167],"generalizes":[168],"scenes":[170],"many":[172,268],"varying":[174],"numbers":[175],"objects,":[177],"although":[178],"being":[179],"trained":[180],"two":[183],"objects":[184,194],"at":[185],"time.":[187],"This":[188],"encoding":[192],"goal":[200],"(segmented)":[202],"images":[203],"input":[205],"network,":[209],"instead":[210],"fixed":[213],"feature":[214],"vector.":[215],"We":[216],"show":[217,258],"framework":[220],"can":[221],"not":[222],"handle":[224],"kinematic":[225],"pick-and-place":[229],"(as":[230],"typical":[231],"TAMP),":[233],"but":[234],"also":[235],"tool-use":[236],"scenarios":[237],"planar":[239],"pushing":[240],"under":[241],"quasi-static":[242],"dynamic":[243],"models.":[244],"Here,":[245],"image-based":[247],"representation":[248],"enables":[249],"generalization":[250],"other":[252],"shapes":[253],"than":[254],"during":[255],"training.":[256],"Results":[257],"runtime":[259],"improvements":[260],"several":[262],"orders":[263],"magnitudes":[265],"by,":[266],"cases,":[269],"removing":[270],"need":[272],"search":[274],"over":[275],"sequences.":[279]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
