{"id":"https://openalex.org/W4389665789","doi":"https://doi.org/10.1109/iros55552.2023.10342257","title":"AV-PedAware: Self-Supervised Audio-Visual Fusion for Dynamic Pedestrian Awareness","display_name":"AV-PedAware: Self-Supervised Audio-Visual Fusion for Dynamic Pedestrian Awareness","publication_year":2023,"publication_date":"2023-10-01","ids":{"openalex":"https://openalex.org/W4389665789","doi":"https://doi.org/10.1109/iros55552.2023.10342257"},"language":"en","primary_location":{"id":"doi:10.1109/iros55552.2023.10342257","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros55552.2023.10342257","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2411.06789","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113700322","display_name":"Yizhuo Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Yizhuo Yang","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Nanyang Technological University,Singapore,639798"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University,Singapore,639798","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026848789","display_name":"Shenghai Yuan","orcid":"https://orcid.org/0009-0003-1887-6342"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Shenghai Yuan","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Nanyang Technological University,Singapore,639798"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University,Singapore,639798","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045222213","display_name":"Muqing Cao","orcid":"https://orcid.org/0000-0002-5867-5049"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Muqing Cao","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Nanyang Technological University,Singapore,639798"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University,Singapore,639798","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005666034","display_name":"Jianfei Yang","orcid":"https://orcid.org/0000-0002-8075-0439"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Jianfei Yang","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Nanyang Technological University,Singapore,639798"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University,Singapore,639798","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100365448","display_name":"Lihua Xie","orcid":"https://orcid.org/0000-0002-7137-4136"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Lihua Xie","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Nanyang Technological University,Singapore,639798"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Nanyang Technological University,Singapore,639798","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5113700322"],"corresponding_institution_ids":["https://openalex.org/I172675005"],"apc_list":null,"apc_paid":null,"fwci":1.296,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.83036029,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1871","last_page":"1877"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12597","display_name":"Fire Detection and Safety Systems","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/2213","display_name":"Safety, Risk, Reliability and Quality"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7931081056594849},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7218184471130371},{"id":"https://openalex.org/keywords/lidar","display_name":"Lidar","score":0.6675562858581543},{"id":"https://openalex.org/keywords/pedestrian","display_name":"Pedestrian","score":0.6223017573356628},{"id":"https://openalex.org/keywords/pedestrian-detection","display_name":"Pedestrian detection","score":0.5992327928543091},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5650057792663574},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.5254444479942322},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4941706955432892},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.49359166622161865},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.4847673773765564},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4244047701358795},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.33056849241256714},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.15673169493675232},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10258466005325317},{"id":"https://openalex.org/keywords/remote-sensing","display_name":"Remote sensing","score":0.08942657709121704}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7931081056594849},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7218184471130371},{"id":"https://openalex.org/C51399673","wikidata":"https://www.wikidata.org/wiki/Q504027","display_name":"Lidar","level":2,"score":0.6675562858581543},{"id":"https://openalex.org/C2777113093","wikidata":"https://www.wikidata.org/wiki/Q221488","display_name":"Pedestrian","level":2,"score":0.6223017573356628},{"id":"https://openalex.org/C2780156472","wikidata":"https://www.wikidata.org/wiki/Q2355550","display_name":"Pedestrian detection","level":3,"score":0.5992327928543091},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5650057792663574},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.5254444479942322},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4941706955432892},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.49359166622161865},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.4847673773765564},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4244047701358795},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.33056849241256714},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.15673169493675232},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10258466005325317},{"id":"https://openalex.org/C62649853","wikidata":"https://www.wikidata.org/wiki/Q199687","display_name":"Remote sensing","level":1,"score":0.08942657709121704},{"id":"https://openalex.org/C22212356","wikidata":"https://www.wikidata.org/wiki/Q775325","display_name":"Transport engineering","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/iros55552.2023.10342257","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros55552.2023.10342257","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2411.06789","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.06789","pdf_url":"https://arxiv.org/pdf/2411.06789","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2411.06789","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.06789","pdf_url":"https://arxiv.org/pdf/2411.06789","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","score":0.6399999856948853,"id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320318804","display_name":"Delta Electronics","ror":"https://ror.org/04s3g5933"},{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"},{"id":"https://openalex.org/F4320320709","display_name":"National Research Foundation Singapore","ror":"https://ror.org/03cpyc314"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4389665789.pdf","grobid_xml":"https://content.openalex.org/works/W4389665789.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W2117539524","https://openalex.org/W2147673046","https://openalex.org/W2186094539","https://openalex.org/W2194775991","https://openalex.org/W2560023338","https://openalex.org/W2772736377","https://openalex.org/W2772781455","https://openalex.org/W2949708697","https://openalex.org/W2956791496","https://openalex.org/W2989980422","https://openalex.org/W3004090558","https://openalex.org/W3018757597","https://openalex.org/W3049847664","https://openalex.org/W3103391829","https://openalex.org/W3105684258","https://openalex.org/W3110514869","https://openalex.org/W3130316726","https://openalex.org/W3138095408","https://openalex.org/W3162475350","https://openalex.org/W3188558905","https://openalex.org/W3202854416","https://openalex.org/W3213716738","https://openalex.org/W4200379384","https://openalex.org/W4200633562","https://openalex.org/W4214604251","https://openalex.org/W4312356258","https://openalex.org/W4312622631","https://openalex.org/W6777046832"],"related_works":["https://openalex.org/W4319317934","https://openalex.org/W2901265155","https://openalex.org/W2956374172","https://openalex.org/W4319837668","https://openalex.org/W4308071650","https://openalex.org/W3188333020","https://openalex.org/W4281783339","https://openalex.org/W1964041166","https://openalex.org/W2972620127","https://openalex.org/W2981141433"],"abstract_inverted_index":{"In":[0],"this":[1],"study,":[2],"we":[3,160],"introduce":[4],"AV-PedAware,":[5],"a":[6,23,114,129,162],"self-supervised":[7,106],"audio-visual":[8,83],"fusion":[9,84],"system":[10,102],"designed":[11],"to":[12,39,48,81,85,117,125,176,207],"improve":[13],"dynamic":[14,142],"pedestrian":[15,67,119,165],"awareness":[16,21],"for":[17,65,89,204],"robotics":[18,28,215],"applications.":[19,29],"Pedestrian":[20],"is":[22,103],"critical":[24],"requirement":[25],"in":[26,53,98,189,211],"many":[27],"However,":[30],"traditional":[31,150],"approaches":[32],"that":[33,171],"rely":[34],"on":[35,109],"cameras":[36],"and":[37,46,56,72,144,152,168,185,199],"LIDARs":[38],"cover":[40],"multiple":[41],"views":[42],"can":[43,140],"be":[44],"expensive":[45],"susceptible":[47],"issues":[49],"such":[50],"as":[51],"changes":[52],"illumination,":[54],"occlusion,":[55],"weather":[57],"conditions.":[58,192],"Our":[59],"proposed":[60],"solution":[61],"replicates":[62],"human":[63],"perception":[64,216],"3D":[66,179],"detection":[68,166,180],"using":[69,182],"low-cost":[70],"audio":[71,184],"visual":[73,186,191],"fusion.":[74],"This":[75],"study":[76],"represents":[77],"the":[78,90,94,99,132,147,173,205,212],"first":[79],"attempt":[80],"employ":[82],"monitor":[86],"footstep":[87],"sounds":[88],"purpose":[91],"of":[92,96,131,149,214],"predicting":[93],"movements":[95],"pedestrians":[97],"vicinity.":[100],"The":[101],"trained":[104],"through":[105],"learning":[107],"based":[108],"LIDAR-generated":[110],"labels,":[111],"making":[112],"it":[113,139],"cost-effective":[115],"alternative":[116],"LIDAR-based":[118,126],"awareness.":[120],"AV-PedAware":[121],"achieves":[122],"comparable":[123],"results":[124,181],"systems":[127],"at":[128],"fraction":[130],"cost.":[133],"By":[134],"utilizing":[135],"an":[136],"attention":[137],"mechanism,":[138],"handle":[141],"lighting":[143],"occlusions,":[145],"overcoming":[146],"limitations":[148],"LIDAR":[151],"camera-based":[153],"systems.":[154,217],"To":[155],"evaluate":[156],"our":[157,196],"approach's":[158],"effectiveness,":[159],"collected":[161,197],"new":[163],"multimodal":[164],"dataset":[167,198],"conducted":[169],"experiments":[170],"demonstrate":[172],"system's":[174],"ability":[175],"provide":[177],"reliable":[178],"only":[183],"data,":[187],"even":[188],"extreme":[190],"We":[193],"will":[194],"make":[195],"source":[200],"code":[201],"available":[202],"online":[203],"community":[206],"encourage":[208],"further":[209],"development":[210],"field":[213]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
