{"id":"https://openalex.org/W4225285639","doi":"https://doi.org/10.1109/icassp43922.2022.9747094","title":"Low-Latency Human-Computer Auditory Interface Based on Real-Time Vision Analysis","display_name":"Low-Latency Human-Computer Auditory Interface Based on Real-Time Vision Analysis","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4225285639","doi":"https://doi.org/10.1109/icassp43922.2022.9747094"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9747094","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747094","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hal.science/hal-03796641","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051399721","display_name":"Florian Scalvini","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Florian Scalvini","raw_affiliation_strings":["ImViA EA 7535 - Univ. Bourgogne Franche-Comt&#x00E9;,Dijon,France"],"affiliations":[{"raw_affiliation_string":"ImViA EA 7535 - Univ. Bourgogne Franche-Comt&#x00E9;,Dijon,France","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043445900","display_name":"Camille Bordeau","orcid":"https://orcid.org/0000-0003-3666-7846"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Camille Bordeau","raw_affiliation_strings":["Univ. Bourgogne Franche-Comt&#x00E9;,LEAD CNRS UMR 5022,Dijon,France"],"affiliations":[{"raw_affiliation_string":"Univ. Bourgogne Franche-Comt&#x00E9;,LEAD CNRS UMR 5022,Dijon,France","institution_ids":["https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019367550","display_name":"Maxime Ambard","orcid":"https://orcid.org/0000-0003-0653-3177"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Maxime Ambard","raw_affiliation_strings":["Univ. Bourgogne Franche-Comt&#x00E9;,LEAD CNRS UMR 5022,Dijon,France"],"affiliations":[{"raw_affiliation_string":"Univ. Bourgogne Franche-Comt&#x00E9;,LEAD CNRS UMR 5022,Dijon,France","institution_ids":["https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055057335","display_name":"Cyrille Migniot","orcid":"https://orcid.org/0000-0002-1651-4837"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cyrille Migniot","raw_affiliation_strings":["ImViA EA 7535 - Univ. Bourgogne Franche-Comt&#x00E9;,Dijon,France"],"affiliations":[{"raw_affiliation_string":"ImViA EA 7535 - Univ. Bourgogne Franche-Comt&#x00E9;,Dijon,France","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048042942","display_name":"Julien Dubois","orcid":"https://orcid.org/0000-0002-3029-173X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Julien Dubois","raw_affiliation_strings":["ImViA EA 7535 - Univ. Bourgogne Franche-Comt&#x00E9;,Dijon,France"],"affiliations":[{"raw_affiliation_string":"ImViA EA 7535 - Univ. Bourgogne Franche-Comt&#x00E9;,Dijon,France","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5051399721"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.6006,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.83129727,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"36","last_page":"40"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10914","display_name":"Tactile and Sensory Interactions","score":0.991100013256073,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10914","display_name":"Tactile and Sensory Interactions","score":0.991100013256073,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11707","display_name":"Gaze Tracking and Assistive Technology","score":0.9700000286102295,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12496","display_name":"Color perception and design","score":0.9696999788284302,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7856988906860352},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.6330100297927856},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.5390260815620422},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.44905945658683777},{"id":"https://openalex.org/keywords/low-latency","display_name":"Low latency (capital markets)","score":0.4391511380672455},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3607316017150879},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3293466567993164},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.32711267471313477},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1284431517124176},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.09339609742164612},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.07252958416938782}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7856988906860352},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6330100297927856},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.5390260815620422},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.44905945658683777},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.4391511380672455},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3607316017150879},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3293466567993164},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.32711267471313477},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1284431517124176},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.09339609742164612},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.07252958416938782},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp43922.2022.9747094","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747094","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-03796641v1","is_oa":true,"landing_page_url":"https://hal.science/hal-03796641","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), May 2022, Singapore, France. pp.36-40, &#x27E8;10.1109/ICASSP43922.2022.9747094&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-03796641v1","is_oa":true,"landing_page_url":"https://hal.science/hal-03796641","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), May 2022, Singapore, France. pp.36-40, &#x27E8;10.1109/ICASSP43922.2022.9747094&#x27E9;","raw_type":"Conference papers"},"sustainable_development_goals":[{"score":0.8199999928474426,"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1515109179","https://openalex.org/W1831408421","https://openalex.org/W1861492603","https://openalex.org/W2069430629","https://openalex.org/W2133797163","https://openalex.org/W2217810635","https://openalex.org/W2509876617","https://openalex.org/W2594917419","https://openalex.org/W2741346289","https://openalex.org/W2766459852","https://openalex.org/W3094535122","https://openalex.org/W6630456326","https://openalex.org/W6639102338","https://openalex.org/W6688765168"],"related_works":["https://openalex.org/W2378005410","https://openalex.org/W2170435352","https://openalex.org/W3205411230","https://openalex.org/W4286899009","https://openalex.org/W9168048","https://openalex.org/W4300849822","https://openalex.org/W4376480820","https://openalex.org/W3155891479","https://openalex.org/W3029351463","https://openalex.org/W4308600690"],"abstract_inverted_index":{"This":[0,66],"paper":[1],"proposes":[2],"a":[3,32,54,81,85,96,106,120],"visuo-auditory":[4],"substitution":[5],"method":[6,117],"to":[7,27,59,99],"assist":[8],"visually":[9],"impaired":[10],"people":[11],"in":[12,21,25,38],"scene":[13],"understanding.":[14],"Our":[15],"approach":[16],"focuses":[17],"on":[18,53,105],"person":[19,64,73],"localisation":[20],"the":[22,71],"user\u2019s":[23,42],"vicinity":[24],"order":[26],"ease":[28],"urban":[29],"walking.":[30],"Since":[31],"real-time":[33,97,121],"and":[34,76],"low-latency":[35],"is":[36,51,68,77,92],"required":[37],"this":[39,116],"context":[40],"for":[41,119],"security,":[43],"we":[44],"propose":[45],"an":[46,61,113],"embedded":[47],"system.":[48],"The":[49],"processing":[50,98],"based":[52],"lightweight":[55],"convolutional":[56],"neural":[57],"network":[58],"perform":[60],"efficient":[62],"2D":[63],"localisation.":[65],"measurement":[67],"enhanced":[69],"with":[70,112],"corresponding":[72],"depth":[74],"information,":[75],"then":[78],"transcribed":[79],"into":[80],"stereophonic":[82],"signal":[83],"via":[84],"head-related":[86],"transfer":[87],"function.":[88],"A":[89],"GPU-based":[90],"implementation":[91],"presented":[93],"that":[94,115],"enables":[95],"be":[100],"reached":[101],"at":[102],"23":[103],"frames/s":[104],"640x480":[107],"video":[108],"stream.":[109],"We":[110],"show":[111],"experiment":[114],"allows":[118],"accurate":[122],"audio-based":[123],"localization.":[124]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2025-10-10T00:00:00"}
