{"id":"https://openalex.org/W7161728371","doi":"https://doi.org/10.48550/arxiv.2605.18109","title":"TaskGround: Structured Executable Task Inference for Full-Scene Household Reasoning","display_name":"TaskGround: Structured Executable Task Inference for Full-Scene Household Reasoning","publication_year":2026,"publication_date":"2026-05-18","ids":{"openalex":"https://openalex.org/W7161728371","doi":"https://doi.org/10.48550/arxiv.2605.18109"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.18109","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18109","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.18109","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136488662","display_name":"ZhiYuan Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, ZhiYuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136459336","display_name":"Yu Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136481198","display_name":"Ruichuan An","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"An, Ruichuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136504320","display_name":"Zhenhua Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Zhenhua","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136504880","display_name":"Qixiu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Qixiu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136498182","display_name":"Keming Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Keming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136489071","display_name":"Zhiying Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Zhiying","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136504822","display_name":"Weijie Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Weijie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057219846","display_name":"Haoxiao Wang","orcid":"https://orcid.org/0009-0000-4252-4353"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Haoxiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136497763","display_name":"Shuang Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Shuang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136483460","display_name":"Sicheng Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Sicheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100678419","display_name":"Yaobo Liang","orcid":"https://orcid.org/0000-0002-6595-5145"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Yaobo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136467223","display_name":"Jiaolong Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jiaolong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136462220","display_name":"Baining Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Baining","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":14,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6133000254631042,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6133000254631042,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.06430000066757202,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.0478999987244606,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.8770999908447266},{"id":"https://openalex.org/keywords/situated","display_name":"Situated","score":0.7937999963760376},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7355999946594238},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.675000011920929},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6383000016212463},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4636000096797943}],"concepts":[{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.8770999908447266},{"id":"https://openalex.org/C132829578","wikidata":"https://www.wikidata.org/wiki/Q581151","display_name":"Situated","level":2,"score":0.7937999963760376},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7355999946594238},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7146000266075134},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.675000011920929},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6383000016212463},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4636000096797943},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4440999925136566},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3971000015735626},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.39070001244544983},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.35670000314712524},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.30720001459121704},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.29600000381469727},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.28380000591278076},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2624000012874603}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.18109","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18109","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.18109","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18109","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0,98],"real":[1],"home":[2],"deployments,":[3],"household":[4,12,17,53,58,63,85,165,225,242],"agents":[5,29],"must":[6,67],"often":[7],"operate":[8],"from":[9,21,42],"a":[10,15,22,56,61,74,125,159,220],"complete":[11,57,84,133],"scene":[13,45,59,138],"and":[14,38,60,96,108,127,144,171,174,190,227],"situated":[16,62],"request,":[18,64],"rather":[19],"than":[20],"clean":[23],"task":[24,36,70,142,181],"specification.":[25],"Such":[26],"requests":[27],"require":[28],"to":[30,211],"identify":[31,215],"task-relevant":[32,137],"entities,":[33],"recover":[34],"intended":[35],"conditions,":[37],"resolve":[39],"ordering":[40],"constraints":[41],"the":[43],"surrounding":[44],"context.":[46],"We":[47,122],"formalize":[48],"this":[49,101,154],"capability":[50],"as":[51,219],"full-scene":[52,224],"reasoning:":[54],"given":[55],"an":[65],"agent":[66],"infer":[68],"executable":[69,141,216],"structure":[71],"before":[72],"producing":[73],"grounded":[75,148],"skill-level":[76,149],"action":[77,150],"sequence.":[78],"This":[79],"setting":[80],"is":[81,103],"challenging":[82],"because":[83],"scenes":[86,134],"contain":[87],"substantial":[88],"task-irrelevant":[89],"information,":[90],"making":[91],"direct":[92,201],"complete-scene":[93,202],"prompting":[94,203],"inefficient":[95],"error-prone.":[97],"practical":[99,241],"deployment,":[100],"challenge":[102],"further":[104],"amplified":[105],"by":[106,184,209],"privacy":[107],"local":[109,235],"compute":[110],"constraints,":[111],"which":[112],"favor":[113],"compact":[114,136,234],"open-weight":[115,191],"models":[116,236],"with":[117,198],"limited":[118],"long-context":[119],"reasoning":[120,226],"ability.":[121],"propose":[123],"TaskGround,":[124],"training-free":[126],"model-agnostic":[128],"Ground-Infer-Execute":[129],"framework":[130],"that":[131,229],"grounds":[132],"into":[135,147],"slices,":[139],"infers":[140],"structure,":[143],"compiles":[145],"it":[146,194],"sequences.":[151],"To":[152],"evaluate":[153],"setting,":[155],"we":[156],"introduce":[157],"FullHome,":[158,178],"human-validated":[160],"evaluation":[161],"suite":[162],"of":[163],"400":[164],"tasks":[166],"spanning":[167],"diverse":[168],"home-scale":[169],"environments":[170],"both":[172,188],"goal-oriented":[173],"process-constrained":[175],"requirements.":[176],"On":[177],"TaskGround":[179],"improves":[180],"success":[182],"rates":[183],"large":[185],"margins":[186],"across":[187],"proprietary":[189],"models.":[192],"Notably,":[193],"makes":[195],"Qwen3.5-9B":[196],"competitive":[197],"GPT-5":[199],"under":[200],"while":[204],"reducing":[205],"total":[206],"input-token":[207],"cost":[208],"up":[210],"18x.":[212],"Our":[213],"results":[214],"task-structure":[217],"inference":[218],"central":[221],"bottleneck":[222],"in":[223],"show":[228],"structured":[230],"grounding":[231],"can":[232],"make":[233],"substantially":[237],"more":[238],"effective":[239],"for":[240],"deployment.":[243]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-20T00:00:00"}
