{"id":"https://openalex.org/W4417266693","doi":"https://doi.org/10.1109/iccv51701.2025.02526","title":"Egocentric Action-Aware Inertial Localization in Point Clouds with Vision-Language Guidance","display_name":"Egocentric Action-Aware Inertial Localization in Point Clouds with Vision-Language Guidance","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4417266693","doi":"https://doi.org/10.1109/iccv51701.2025.02526"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.02526","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02526","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2505.14346","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100685355","display_name":"Mingfang Zhang","orcid":"https://orcid.org/0000-0003-1792-6654"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Mingfang Zhang","raw_affiliation_strings":["The University of Tokyo"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Tokyo","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020282208","display_name":"Ryo Yonetani","orcid":"https://orcid.org/0000-0002-2724-6233"},"institutions":[{"id":"https://openalex.org/I4210089607","display_name":"CyberAgent (Japan)","ror":"https://ror.org/0060jg679","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210089607"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Ryo Yonetani","raw_affiliation_strings":["CyberAgent AI Lab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"CyberAgent AI Lab","institution_ids":["https://openalex.org/I4210089607"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100642051","display_name":"Yifei Huang","orcid":"https://orcid.org/0000-0002-3077-0175"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yifei Huang","raw_affiliation_strings":["The University of Tokyo"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Tokyo","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113287550","display_name":"Liangyang Ouyang","orcid":null},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Liangyang Ouyang","raw_affiliation_strings":["The University of Tokyo"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Tokyo","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068038722","display_name":"Ruicong Liu","orcid":"https://orcid.org/0000-0002-8460-8763"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Ruicong Liu","raw_affiliation_strings":["The University of Tokyo"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Tokyo","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089491968","display_name":"Yoichi Sato","orcid":"https://orcid.org/0000-0002-7456-0989"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yoichi Sato","raw_affiliation_strings":["The University of Tokyo"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Tokyo","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100685355"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37861162,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"27209","last_page":"27219"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.913100004196167,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.913100004196167,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.014999999664723873,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.010099999606609344,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inertial-measurement-unit","display_name":"Inertial measurement unit","score":0.9021999835968018},{"id":"https://openalex.org/keywords/point-cloud","display_name":"Point cloud","score":0.5982000231742859},{"id":"https://openalex.org/keywords/inertial-frame-of-reference","display_name":"Inertial frame of reference","score":0.5694000124931335},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.43650001287460327},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4350999891757965},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4041000008583069},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.39169999957084656},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.38760000467300415}],"concepts":[{"id":"https://openalex.org/C79061980","wikidata":"https://www.wikidata.org/wiki/Q941680","display_name":"Inertial measurement unit","level":2,"score":0.9021999835968018},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7556999921798706},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7017999887466431},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6158000230789185},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.5982000231742859},{"id":"https://openalex.org/C173386949","wikidata":"https://www.wikidata.org/wiki/Q192735","display_name":"Inertial frame of reference","level":2,"score":0.5694000124931335},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.43650001287460327},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4350999891757965},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4041000008583069},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.39169999957084656},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.38760000467300415},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.37209999561309814},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.36070001125335693},{"id":"https://openalex.org/C16345878","wikidata":"https://www.wikidata.org/wiki/Q107472979","display_name":"Orientation (vector space)","level":2,"score":0.3560999929904938},{"id":"https://openalex.org/C86369673","wikidata":"https://www.wikidata.org/wiki/Q1203659","display_name":"Simultaneous localization and mapping","level":4,"score":0.3431999981403351},{"id":"https://openalex.org/C155542232","wikidata":"https://www.wikidata.org/wiki/Q736111","display_name":"Optical flow","level":3,"score":0.27900001406669617},{"id":"https://openalex.org/C128651787","wikidata":"https://www.wikidata.org/wiki/Q570607","display_name":"Inertial navigation system","level":3,"score":0.27059999108314514},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.25929999351501465},{"id":"https://openalex.org/C2776937971","wikidata":"https://www.wikidata.org/wiki/Q4384217","display_name":"Heading (navigation)","level":2,"score":0.2556999921798706}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.02526","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02526","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2505.14346","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.14346","pdf_url":"https://arxiv.org/pdf/2505.14346","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2505.14346","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2505.14346","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2505.14346","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.14346","pdf_url":"https://arxiv.org/pdf/2505.14346","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4417266693.pdf","grobid_xml":"https://content.openalex.org/works/W4417266693.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,29,92,207],"novel":[4],"inertial":[5,34,190,220,223],"localization":[6,35,103,221],"framework":[7,108,217],"named":[8],"Egocentric":[9],"Action-aware":[10],"Inertial":[11],"Localization":[12],"(EAIL),":[13],"which":[14],"leverages":[15],"egocentric":[16,139],"action":[17,140,224],"cues":[18,141],"from":[19],"head-mounted":[20,73],"IMU":[21,40,56,74,143,178],"signals":[22,144,164],"to":[23,39,83,91,99,165,188,199],"localize":[24],"the":[25,72,102,122,127,150,177,181,201,212,215],"target":[26],"individual":[27],"within":[28],"3D":[30,123],"point":[31,124,151,182],"cloud.":[32,152],"Human":[33],"is":[36,129,156],"challenging":[37],"due":[38],"sensor":[41],"noise":[42],"that":[43,67,121,136],"causes":[44],"trajectory":[45],"drift":[46],"over":[47,184,218],"time.":[48],"The":[49,105,153,169],"diversity":[50],"of":[51,126,204,214],"human":[52],"actions":[53,69,205],"further":[54,196],"complicates":[55],"signal":[57],"processing":[58],"by":[59,71],"introducing":[60],"various":[61],"motion":[62],"patterns.":[63],"Nevertheless,":[64],"we":[65],"observe":[66],"some":[68],"captured":[70],"correlate":[75],"with":[76,116,145],"spatial":[77,97],"environmental":[78,147],"structures":[79],"(e.g.,":[80],"bending":[81],"down":[82],"look":[84],"inside":[85],"an":[86],"oven,":[87],"washing":[88],"dishes":[89],"next":[90],"sink),":[93],"thereby":[94],"serving":[95],"as":[96,206],"anchors":[98],"compensate":[100],"for":[101],"drift.":[104],"proposed":[106,216],"EAIL":[107],"learns":[109,133],"such":[110],"correlations":[111],"via":[112],"hierarchical":[113],"multi-modal":[114],"alignment":[115],"vision-language":[117],"guidance.":[118],"By":[119],"assuming":[120],"cloud":[125,183],"environment":[128],"available,":[130],"it":[131],"contrastively":[132],"modality":[134],"encoders":[135,171,194],"align":[137],"short-term":[138],"in":[142,149,175],"local":[146],"features":[148],"learning":[154],"process":[155],"enhanced":[157],"using":[158],"concurrently":[159],"collected":[160],"vision":[161],"and":[162,180,186,222],"language":[163],"improve":[166],"multimodal":[167],"alignment.":[168],"learned":[170],"are":[172],"then":[173],"used":[174],"reasoning":[176],"data":[179],"time":[185],"space":[187],"perform":[189],"localization.":[191],"Interestingly,":[192],"these":[193],"can":[195],"be":[197],"utilized":[198],"recognize":[200],"corresponding":[202],"sequence":[203],"by-product.":[208],"Extensive":[209],"experiments":[210],"demonstrate":[211],"effectiveness":[213],"state-of-the-art":[219],"recognition":[225],"baselines.":[226]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-10T00:00:00"}
