{"id":"https://openalex.org/W4416749193","doi":"https://doi.org/10.1109/iros60139.2025.11246145","title":"SORT3D: Spatial Object-centric Reasoning Toolbox for Zero-Shot 3D Grounding Using Large Language Models","display_name":"SORT3D: Spatial Object-centric Reasoning Toolbox for Zero-Shot 3D Grounding Using Large Language Models","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416749193","doi":"https://doi.org/10.1109/iros60139.2025.11246145"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11246145","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246145","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114638248","display_name":"Nader Zantout","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Nader Zantout","raw_affiliation_strings":["Carnegie Mellon University,Robotics Institute,Pittsburgh,PA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,Robotics Institute,Pittsburgh,PA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073361709","display_name":"Haochen Zhang","orcid":"https://orcid.org/0009-0008-2933-2240"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haochen Zhang","raw_affiliation_strings":["Carnegie Mellon University,Robotics Institute,Pittsburgh,PA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,Robotics Institute,Pittsburgh,PA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093192252","display_name":"Pujith Kachana","orcid":"https://orcid.org/0009-0000-0671-5902"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pujith Kachana","raw_affiliation_strings":["Carnegie Mellon University,Robotics Institute,Pittsburgh,PA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,Robotics Institute,Pittsburgh,PA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103165731","display_name":"Judy Qiu","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jinkai Qiu","raw_affiliation_strings":["Carnegie Mellon University,Robotics Institute,Pittsburgh,PA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,Robotics Institute,Pittsburgh,PA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112150731","display_name":"Guofei Chen","orcid":"https://orcid.org/0000-0003-2593-3395"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guofei Chen","raw_affiliation_strings":["Carnegie Mellon University,Robotics Institute,Pittsburgh,PA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,Robotics Institute,Pittsburgh,PA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101787171","display_name":"Ji Zhang","orcid":"https://orcid.org/0000-0001-6141-239X"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ji Zhang","raw_affiliation_strings":["Carnegie Mellon University,Robotics Institute,Pittsburgh,PA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,Robotics Institute,Pittsburgh,PA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101634791","display_name":"Wenshan Wang","orcid":"https://orcid.org/0000-0001-7801-3803"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wenshan Wang","raw_affiliation_strings":["Carnegie Mellon University,Robotics Institute,Pittsburgh,PA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,Robotics Institute,Pittsburgh,PA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5114638248"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":2.856,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.92542566,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"2201","last_page":"2208"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9442999958992004,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9442999958992004,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.007400000002235174,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.007400000002235174,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/toolbox","display_name":"Toolbox","score":0.6952000260353088},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6132000088691711},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5929999947547913},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.515999972820282},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.508899986743927},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4472000002861023},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.43959999084472656},{"id":"https://openalex.org/keywords/ground","display_name":"Ground","score":0.41839998960494995},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.3971000015735626}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7878000140190125},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.6952000260353088},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6132000088691711},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5929999947547913},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5795000195503235},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.515999972820282},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.508899986743927},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4472000002861023},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.43959999084472656},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.41839998960494995},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.3971000015735626},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39259999990463257},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.37779998779296875},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3695000112056732},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.35929998755455017},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.35190001130104065},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3280999958515167},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.3224000036716461},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.29600000381469727},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.2930999994277954},{"id":"https://openalex.org/C174252522","wikidata":"https://www.wikidata.org/wiki/Q3816772","display_name":"Natural language user interface","level":3,"score":0.28040000796318054},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.27399998903274536},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.2612999975681305},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.26100000739097595},{"id":"https://openalex.org/C94922259","wikidata":"https://www.wikidata.org/wiki/Q33215","display_name":"Constructed language","level":2,"score":0.2596000134944916},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.2554999887943268},{"id":"https://openalex.org/C20894473","wikidata":"https://www.wikidata.org/wiki/Q1116105","display_name":"Object model","level":3,"score":0.25540000200271606},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2542000114917755},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11246145","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246145","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W2594519801","https://openalex.org/W2964339842","https://openalex.org/W3095974555","https://openalex.org/W3107521863","https://openalex.org/W4312852845","https://openalex.org/W4313162371","https://openalex.org/W4386075724","https://openalex.org/W4390872495","https://openalex.org/W4393154152","https://openalex.org/W4401416853","https://openalex.org/W4402754162","https://openalex.org/W4403600470","https://openalex.org/W4405785140","https://openalex.org/W4408841099","https://openalex.org/W4413144643","https://openalex.org/W4413947072"],"related_works":[],"abstract_inverted_index":{"Interpreting":[0],"object-referential":[1],"language":[2,42,54,105],"and":[3,11,37,70,93,122,158],"grounding":[4,141],"objects":[5],"in":[6,45],"3D":[7,47],"with":[8,100],"spatial":[9,97],"relations":[10],"attributes":[12,89],"is":[13,23,57,61,181],"essential":[14],"for":[15,63,120,166,177],"robots":[16],"operating":[17],"alongside":[18],"humans.":[19],"However,":[20],"this":[21],"task":[22],"often":[24],"challenging":[25],"due":[26],"to":[27,65,73,108,127,151],"the":[28,46,101,149,178],"diversity":[29],"of":[30,34,41,52,103],"scenes,":[31],"large":[32,50,104],"number":[33],"fine-grained":[35],"objects,":[36],"complex":[38,139],"free-form":[39],"nature":[40],"references.":[43],"Furthermore,":[44],"domain,":[48],"obtaining":[49],"amounts":[51],"natural":[53],"training":[55,121],"data":[56,69,92,119],"difficult.":[58],"Thus,":[59],"it":[60],"important":[62],"methods":[64],"learn":[66],"from":[67,90],"little":[68],"zero-shot":[71,126,136],"generalize":[72],"new":[74],"environments.":[75,129,173],"To":[76],"address":[77],"these":[78],"challenges,":[79],"we":[80],"propose":[81],"SORT3D,":[82],"an":[83],"approach":[84,162],"that":[85,132,160],"utilizes":[86],"rich":[87],"object":[88],"2D":[91],"merges":[94],"a":[95],"heuristics-based":[96],"reasoning":[98],"toolbox":[99],"ability":[102],"models":[106],"(LLMs)":[107],"perform":[109],"sequential":[110],"reasoning.":[111],"Importantly,":[112],"our":[113,161],"method":[114],"does":[115],"not":[116],"require":[117],"text-to-3D":[118],"can":[123,163],"be":[124,164],"applied":[125],"unseen":[128,171],"We":[130,146],"show":[131],"SORT3D":[133],"achieves":[134],"state-of-the-art":[135],"performance":[137],"on":[138,143,154,169],"view-dependent":[140],"tasks":[142],"two":[144,155],"benchmarks.":[145],"also":[147],"implement":[148],"pipeline":[150,180],"run":[152],"real-time":[153],"autonomous":[156],"vehicles":[157],"demonstrate":[159],"used":[165],"object-goal":[167],"navigation":[168],"previously":[170],"real-world":[172],"All":[174],"source":[175],"code":[176],"system":[179],"publicly":[182],"released.":[183],"<sup":[184],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[185],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[186]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-28T00:00:00"}
