{"id":"https://openalex.org/W6967228463","doi":"https://doi.org/10.48550/arxiv.2508.00589","title":"Context-based Motion Retrieval using Open Vocabulary Methods for Autonomous Driving","display_name":"Context-based Motion Retrieval using Open Vocabulary Methods for Autonomous Driving","publication_year":2025,"publication_date":"2025-08-01","ids":{"openalex":"https://openalex.org/W6967228463","doi":"https://doi.org/10.48550/arxiv.2508.00589"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2508.00589","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.00589","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2508.00589","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Englmeier, Stefan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Englmeier, Stefan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"B\u00fcttner, Max A.","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"B\u00fcttner, Max A.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Winter, Katharina","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Winter, Katharina","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Flohr, Fabian B.","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Flohr, Fabian B.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.532800018787384,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.532800018787384,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.1858000010251999,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.10480000078678131,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.6427000164985657},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6157000064849854},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5945000052452087},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5145000219345093},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.46650001406669617},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.42739999294281006},{"id":"https://openalex.org/keywords/orientation","display_name":"Orientation (vector space)","score":0.37959998846054077},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.37779998779296875}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8104000091552734},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6646000146865845},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.6427000164985657},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6157000064849854},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5945000052452087},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5378999710083008},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5145000219345093},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.46650001406669617},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.42739999294281006},{"id":"https://openalex.org/C16345878","wikidata":"https://www.wikidata.org/wiki/Q107472979","display_name":"Orientation (vector space)","level":2,"score":0.37959998846054077},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.37779998779296875},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.35580000281333923},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.3440999984741211},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.3321000039577484},{"id":"https://openalex.org/C10161872","wikidata":"https://www.wikidata.org/wiki/Q557891","display_name":"Motion estimation","level":2,"score":0.3246000111103058},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.29910001158714294},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.29739999771118164},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.29739999771118164},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.29739999771118164},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C2986578859","wikidata":"https://www.wikidata.org/wiki/Q657632","display_name":"Human motion","level":3,"score":0.28630000352859497},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2750000059604645},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.26840001344680786},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.26809999346733093}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2508.00589","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.00589","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2508.00589","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.00589","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Autonomous":[0],"driving":[1,26,57],"systems":[2,58],"must":[3],"operate":[4],"reliably":[5],"in":[6,25,59,155],"safety-critical":[7],"scenarios,":[8,62],"particularly":[9],"those":[10],"involving":[11],"unusual":[12],"or":[13],"complex":[14],"behavior":[15,40,105],"by":[16,150],"Vulnerable":[17],"Road":[18],"Users":[19],"(VRUs).":[20],"Identifying":[21],"these":[22],"edge":[23],"cases":[24],"datasets":[27,48],"is":[28,49],"essential":[29],"for":[30],"robust":[31],"evaluation":[32,54],"and":[33,80,106,131,141],"generalization,":[34],"but":[35],"retrieving":[36],"such":[37],"rare":[38],"human":[39,104],"scenarios":[41],"within":[42],"the":[43,100,122,161],"long":[44],"tail":[45],"of":[46,55,103,121],"large-scale":[47],"challenging.":[50],"To":[51],"support":[52],"targeted":[53],"autonomous":[56],"diverse,":[60],"human-centered":[61],"we":[63],"propose":[64],"a":[65,88],"novel":[66],"context-aware":[67],"motion":[68,78,130],"retrieval":[69,102],"framework.":[70],"Our":[71,97,145],"method":[72],"combines":[73],"Skinned":[74],"Multi-Person":[75],"Linear":[76],"(SMPL)-based":[77],"sequences":[79,140],"corresponding":[81,142],"video":[82],"frames":[83],"before":[84],"encoding":[85],"them":[86],"into":[87],"shared":[89],"multimodal":[90],"embedding":[91],"space":[92],"aligned":[93],"with":[94],"natural":[95],"language.":[96],"approach":[98,146],"enables":[99],"scalable":[101],"their":[107],"context":[108,133],"through":[109],"text":[110],"queries.":[111],"This":[112],"work":[113],"also":[114],"introduces":[115],"our":[116],"dataset":[117],"WayMoCo,":[118],"an":[119],"extension":[120],"Waymo":[123],"Open":[124],"Dataset.":[125],"It":[126],"contains":[127],"automatically":[128],"labeled":[129],"scene":[132],"descriptions":[134],"derived":[135],"from":[136],"generated":[137],"pseudo-ground-truth":[138],"SMPL":[139],"image":[143],"data.":[144],"outperforms":[147],"state-of-the-art":[148],"models":[149],"up":[151],"to":[152],"27.5%":[153],"accuracy":[154],"motion-context":[156],"retrieval,":[157],"when":[158],"evaluated":[159],"on":[160],"WayMoCo":[162],"dataset.":[163]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
