{"id":"https://openalex.org/W7084151283","doi":"https://doi.org/10.23919/mva65244.2025.11175051","title":"Pre-Manipulation Alignment Prediction for Open-Vocabulary Object Manipulation Based on End-Effector Trajectories","display_name":"Pre-Manipulation Alignment Prediction for Open-Vocabulary Object Manipulation Based on End-Effector Trajectories","publication_year":2025,"publication_date":"2025-07-26","ids":{"openalex":"https://openalex.org/W7084151283","doi":"https://doi.org/10.23919/mva65244.2025.11175051"},"language":"en","primary_location":{"id":"doi:10.23919/mva65244.2025.11175051","is_oa":false,"landing_page_url":"https://doi.org/10.23919/mva65244.2025.11175051","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 19th International Conference on Machine Vision and Applications (MVA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Motonari Kambara","orcid":null},"institutions":[{"id":"https://openalex.org/I203951103","display_name":"Keio University","ror":"https://ror.org/02kn6nx58","country_code":"JP","type":"education","lineage":["https://openalex.org/I203951103"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Motonari Kambara","raw_affiliation_strings":["Keio University,Yokohama,Japan"],"affiliations":[{"raw_affiliation_string":"Keio University,Yokohama,Japan","institution_ids":["https://openalex.org/I203951103"]}]},{"author_position":"last","author":{"id":null,"display_name":"Komei Sugiura","orcid":null},"institutions":[{"id":"https://openalex.org/I203951103","display_name":"Keio University","ror":"https://ror.org/02kn6nx58","country_code":"JP","type":"education","lineage":["https://openalex.org/I203951103"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Komei Sugiura","raw_affiliation_strings":["Keio University,Yokohama,Japan"],"affiliations":[{"raw_affiliation_string":"Keio University,Yokohama,Japan","institution_ids":["https://openalex.org/I203951103"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I203951103"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.77202639,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9758999943733215,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9758999943733215,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.004100000020116568,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.0020000000949949026,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6881999969482422},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.6863999962806702},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6797999739646912},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.6481999754905701},{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.6150000095367432},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.44179999828338623},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.4390000104904175},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.41940000653266907}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7893000245094299},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6881999969482422},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.6863999962806702},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6797999739646912},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6481999754905701},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6381000280380249},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.6150000095367432},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.44179999828338623},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.4390000104904175},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.41990000009536743},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.41940000653266907},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.39169999957084656},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38510000705718994},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.3580000102519989},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3271999955177307},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.2897000014781952},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.28540000319480896},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.27090001106262207},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2630999982357025},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.262800008058548},{"id":"https://openalex.org/C70136482","wikidata":"https://www.wikidata.org/wiki/Q13583781","display_name":"A-weighting","level":3,"score":0.2623000144958496},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.2558000087738037},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.2547000050544739}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/mva65244.2025.11175051","is_oa":false,"landing_page_url":"https://doi.org/10.23919/mva65244.2025.11175051","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 19th International Conference on Machine Vision and Applications (MVA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.47060608863830566,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0],"study":[1],"addresses":[2],"a":[3,11,15,26,46,106,113,133],"task":[4,42],"designed":[5],"to":[6,72,76,83,99,121],"predict":[7,100],"the":[8,31,40,51,59,77,81,92,96,110,137],"alignment":[9],"between":[10,89],"natural":[12,65],"language":[13,66],"instruction,":[14],"pre-manipulation":[16],"image,":[17],"and":[18,62,87,91],"an":[19],"end-effector":[20],"trajectory.":[21],"Conventional":[22],"methods":[23],"typically":[24],"perform":[25],"success":[27,54],"prediction":[28,52],"only":[29],"after":[30],"manipulation":[32,101,119],"is":[33],"executed,":[34],"limiting":[35],"their":[36],"efficiency":[37],"in":[38],"executing":[39],"entire":[41],"sequence.":[43],"We":[44,68,104],"propose":[45],"novel":[47],"approach":[48],"that":[49,129],"enables":[50],"of":[53],"or":[55],"failure":[56],"by":[57],"aligning":[58],"given":[60],"trajectories":[61],"images":[63],"with":[64],"instructions.":[67],"introduce":[69],"Trajectory":[70],"Encoder":[71],"apply":[73],"learnable":[74],"weighting":[75],"input":[78],"trajectories,":[79],"allowing":[80],"model":[82],"consider":[84],"temporal":[85],"dynamics":[86],"interactions":[88],"objects":[90],"end":[93],"effector,":[94],"improving":[95],"model\u2019s":[97],"ability":[98],"outcomes":[102],"accurately.":[103],"constructed":[105],"dataset":[107],"based":[108],"on":[109],"RT-1":[111],"dataset,":[112],"large-scale":[114],"benchmark":[115],"for":[116],"open-vocabulary":[117],"object":[118],"tasks,":[120],"evaluate":[122],"our":[123,130],"method.":[124],"The":[125],"experimental":[126],"results":[127],"show":[128],"method":[131],"achieved":[132],"higher":[134],"accuracy":[135],"than":[136],"baselines.":[138]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
