{"id":"https://openalex.org/W3196347085","doi":"https://doi.org/10.1109/tpami.2022.3155643","title":"Binaural SoundNet: Predicting Semantics, Depth and Motion With Binaural Sounds","display_name":"Binaural SoundNet: Predicting Semantics, Depth and Motion With Binaural Sounds","publication_year":2022,"publication_date":"2022-03-03","ids":{"openalex":"https://openalex.org/W3196347085","doi":"https://doi.org/10.1109/tpami.2022.3155643","mag":"3196347085","pmid":"https://pubmed.ncbi.nlm.nih.gov/35239475"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2022.3155643","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2022.3155643","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078838951","display_name":"Dengxin Dai","orcid":"https://orcid.org/0000-0001-5440-9678"},"institutions":[{"id":"https://openalex.org/I4210109712","display_name":"Max Planck Institute for Informatics","ror":"https://ror.org/01w19ak89","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210109712"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Dengxin Dai","raw_affiliation_strings":["Vision for Autonomous Systems Group, MPI for Informatics, Saarbr&#x00FC;cken, Germany"],"affiliations":[{"raw_affiliation_string":"Vision for Autonomous Systems Group, MPI for Informatics, Saarbr&#x00FC;cken, Germany","institution_ids":["https://openalex.org/I4210109712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027109026","display_name":"Arun Balajee Vasudevan","orcid":"https://orcid.org/0000-0002-5409-0780"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arun Balajee Vasudevan","raw_affiliation_strings":["Computer Vision Lab, ETH Z&#x00FC;rich, Zrich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Computer Vision Lab, ETH Z&#x00FC;rich, Zrich, Switzerland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007656938","display_name":"Ji\u0159\u0131\u0301 Matas","orcid":"https://orcid.org/0000-0003-0863-4844"},"institutions":[{"id":"https://openalex.org/I44504214","display_name":"Czech Technical University in Prague","ror":"https://ror.org/03kqpb082","country_code":"CZ","type":"education","lineage":["https://openalex.org/I44504214"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Jiri Matas","raw_affiliation_strings":["Center for Machine Perception, Czech Technical University, Prague, Czechia"],"affiliations":[{"raw_affiliation_string":"Center for Machine Perception, Czech Technical University, Prague, Czechia","institution_ids":["https://openalex.org/I44504214"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001254143","display_name":"Luc Van Gool","orcid":"https://orcid.org/0000-0002-3445-5711"},"institutions":[{"id":"https://openalex.org/I99464096","display_name":"KU Leuven","ror":"https://ror.org/05f950310","country_code":"BE","type":"education","lineage":["https://openalex.org/I99464096"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Luc Van Gool","raw_affiliation_strings":["Computer Vision Lab, ETH Z&#x00FC;rich, Zrich, Switzerland","Department of Electrical Engineering, KU Leuven, Leuven, Belgium"],"affiliations":[{"raw_affiliation_string":"Computer Vision Lab, ETH Z&#x00FC;rich, Zrich, Switzerland","institution_ids":[]},{"raw_affiliation_string":"Department of Electrical Engineering, KU Leuven, Leuven, Belgium","institution_ids":["https://openalex.org/I99464096"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5078838951"],"corresponding_institution_ids":["https://openalex.org/I4210109712"],"apc_list":null,"apc_paid":null,"fwci":1.1899,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.76302039,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"45","issue":"1","first_page":"123","last_page":"136"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/binaural-recording","display_name":"Binaural recording","score":0.8758665323257446},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8328785300254822},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5714050531387329},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5500820279121399},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5124431252479553},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.4958737790584564},{"id":"https://openalex.org/keywords/sound-localization","display_name":"Sound localization","score":0.4714523255825043},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.46655648946762085},{"id":"https://openalex.org/keywords/auditory-scene-analysis","display_name":"Auditory scene analysis","score":0.4520249366760254},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.42578524351119995},{"id":"https://openalex.org/keywords/orientation","display_name":"Orientation (vector space)","score":0.41889840364456177},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.2600449025630951},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.1329679787158966}],"concepts":[{"id":"https://openalex.org/C201247586","wikidata":"https://www.wikidata.org/wiki/Q5612967","display_name":"Binaural recording","level":2,"score":0.8758665323257446},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8328785300254822},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5714050531387329},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5500820279121399},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5124431252479553},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.4958737790584564},{"id":"https://openalex.org/C68236139","wikidata":"https://www.wikidata.org/wiki/Q765652","display_name":"Sound localization","level":2,"score":0.4714523255825043},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.46655648946762085},{"id":"https://openalex.org/C38129911","wikidata":"https://www.wikidata.org/wiki/Q4820038","display_name":"Auditory scene analysis","level":3,"score":0.4520249366760254},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.42578524351119995},{"id":"https://openalex.org/C16345878","wikidata":"https://www.wikidata.org/wiki/Q107472979","display_name":"Orientation (vector space)","level":2,"score":0.41889840364456177},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.2600449025630951},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.1329679787158966},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003463","descriptor_name":"Cues","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003463","descriptor_name":"Cues","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003463","descriptor_name":"Cues","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007858","descriptor_name":"Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007858","descriptor_name":"Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007858","descriptor_name":"Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012660","descriptor_name":"Semantics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012660","descriptor_name":"Semantics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012660","descriptor_name":"Semantics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D013016","descriptor_name":"Sound","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D013016","descriptor_name":"Sound","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D013016","descriptor_name":"Sound","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2022.3155643","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2022.3155643","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:35239475","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35239475","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7699999809265137,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":105,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1523224409","https://openalex.org/W1581848821","https://openalex.org/W1937507046","https://openalex.org/W1971920230","https://openalex.org/W2024787486","https://openalex.org/W2026401213","https://openalex.org/W2028184103","https://openalex.org/W2031489346","https://openalex.org/W2032337854","https://openalex.org/W2038484192","https://openalex.org/W2048237443","https://openalex.org/W2049193094","https://openalex.org/W2067503923","https://openalex.org/W2069057506","https://openalex.org/W2072604753","https://openalex.org/W2110764733","https://openalex.org/W2115579991","https://openalex.org/W2116101591","https://openalex.org/W2120847449","https://openalex.org/W2122497675","https://openalex.org/W2159346755","https://openalex.org/W2171819471","https://openalex.org/W2171834532","https://openalex.org/W2253728777","https://openalex.org/W2340897893","https://openalex.org/W2343077198","https://openalex.org/W2412782625","https://openalex.org/W2511428026","https://openalex.org/W2560474170","https://openalex.org/W2584505851","https://openalex.org/W2619697695","https://openalex.org/W2737712196","https://openalex.org/W2742299865","https://openalex.org/W2764198839","https://openalex.org/W2767709712","https://openalex.org/W2800288142","https://openalex.org/W2886300652","https://openalex.org/W2891715141","https://openalex.org/W2897451716","https://openalex.org/W2939726645","https://openalex.org/W2962703836","https://openalex.org/W2962723698","https://openalex.org/W2962756039","https://openalex.org/W2962865004","https://openalex.org/W2962866891","https://openalex.org/W2962960500","https://openalex.org/W2963061226","https://openalex.org/W2963115079","https://openalex.org/W2963218389","https://openalex.org/W2963680395","https://openalex.org/W2964109005","https://openalex.org/W2964121744","https://openalex.org/W2964309882","https://openalex.org/W2964345931","https://openalex.org/W2970603850","https://openalex.org/W2970746371","https://openalex.org/W2981851635","https://openalex.org/W2982624843","https://openalex.org/W2985775862","https://openalex.org/W2988200020","https://openalex.org/W2989980422","https://openalex.org/W2990408345","https://openalex.org/W2995233853","https://openalex.org/W2995254904","https://openalex.org/W3000389243","https://openalex.org/W3034742263","https://openalex.org/W3085046840","https://openalex.org/W3089887959","https://openalex.org/W3089944088","https://openalex.org/W3092237241","https://openalex.org/W3096411729","https://openalex.org/W3096780661","https://openalex.org/W3101537861","https://openalex.org/W3103348781","https://openalex.org/W3104529101","https://openalex.org/W3106362698","https://openalex.org/W3108332675","https://openalex.org/W3108655859","https://openalex.org/W3121780787","https://openalex.org/W3122808968","https://openalex.org/W3130951396","https://openalex.org/W3138953166","https://openalex.org/W3162322471","https://openalex.org/W3174854700","https://openalex.org/W3174856432","https://openalex.org/W3176232375","https://openalex.org/W3188558905","https://openalex.org/W3190580390","https://openalex.org/W3207649350","https://openalex.org/W4287608901","https://openalex.org/W4293665662","https://openalex.org/W6631190155","https://openalex.org/W6678516059","https://openalex.org/W6729831399","https://openalex.org/W6738607494","https://openalex.org/W6748481559","https://openalex.org/W6755528852","https://openalex.org/W6767784004","https://openalex.org/W6770805772","https://openalex.org/W6771599870","https://openalex.org/W6771763809","https://openalex.org/W6784119104","https://openalex.org/W6785011006","https://openalex.org/W6792340124"],"related_works":["https://openalex.org/W1991848873","https://openalex.org/W3004570917","https://openalex.org/W4389240440","https://openalex.org/W2084430325","https://openalex.org/W2041661331","https://openalex.org/W2539207221","https://openalex.org/W2045803470","https://openalex.org/W2242743481","https://openalex.org/W1571953124","https://openalex.org/W4389102442"],"abstract_inverted_index":{"Humans":[0],"can":[1,148],"robustly":[2],"recognize":[3],"and":[4,61,77,91,99,122,227,234,242,259],"localize":[5],"objects":[6],"by":[7,245],"using":[8,152],"visual":[9,22,98],"and/or":[10],"auditory":[11,146,254],"cues.":[12],"While":[13],"machines":[14],"are":[15,213,231,251,261],"able":[16],"to":[17,133,170,190],"do":[18],"the":[19,50,56,62,66,128,135,139,145,158,172,180,192,210,221,225,239,246,264],"same":[20,136],"with":[21,30,86],"data":[23,258],"already,":[24],"less":[25],"work":[26,33],"has":[27],"been":[28],"done":[29],"sounds.":[31,44,176],"This":[32,143],"develops":[34],"an":[35],"approach":[36],"for":[37,104,205,253],"scene":[38],"understanding":[39],"purely":[40],"based":[41],"on":[42,263],"binaural":[43,89],"The":[45,95,257],"considered":[46],"tasks":[47,182,212],"include":[48],"predicting":[49],"semantic":[51],"masks":[52],"of":[53,58,65,83,97,117,175,229],"sound-making":[54,59],"objects,":[55,60],"motion":[57],"depth":[63],"map":[64],"scene.":[67],"To":[68,155],"this":[69],"aim,":[70],"we":[71,109,160],"propose":[72,161],"a":[73,79,92,111,123],"novel":[74,163],"sensor":[75],"setup":[76],"record":[78],"new":[80],"audio-visual":[81],"dataset":[82],"street":[84],"scenes":[85],"eight":[87],"professional":[88],"microphones":[90,230],"360":[93],"<sup>\u00b0</sup>camera.":[94],"co-existence":[96],"audio":[100],"cues":[101],"is":[102,131],"leveraged":[103],"supervision":[105],"transfer.":[106],"In":[107],"particular,":[108],"employ":[110],"cross-modal":[112],"distillation":[113],"framework":[114],"that":[115,198],"consists":[116],"multiple":[118],"vision":[119],"'teacher'":[120],"methods":[121,141],"sound":[124],"'student'":[125],"method":[126,130,201],"-":[127,216],"student":[129],"trained":[132,150],"generate":[134],"results":[137,196,204],"as":[138],"teacher":[140],"do.":[142],"way,":[144],"system":[147],"be":[149],"without":[151],"human":[153],"annotations.":[154],"further":[156],"boost":[157,191],"performance,":[159,223],"another":[162],"auxiliary":[164],"task,":[165],"coined":[166],"Spatial":[167],"Sound":[168],"Super-Resolution,":[169],"increase":[171],"directional":[173],"resolution":[174],"We":[177],"then":[178],"formulate":[179],"four":[181,207,211],"into":[183],"one":[184],"end-to-end":[185],"trainable":[186],"multi-tasking":[187],"network":[188],"aiming":[189],"overall":[193],"performance.":[194],"Experimental":[195],"show":[197],"1)":[199],"our":[200],"achieves":[202,220],"good":[203],"all":[206],"tasks,":[208],"2)":[209],"mutually":[214],"beneficial":[215],"training":[217],"them":[218],"together":[219],"best":[222],"3)":[224],"number":[226],"orientation":[228],"both":[232],"important,":[233],"4)":[235],"features":[236,243],"learned":[237],"from":[238],"standard":[240],"spectrogram":[241],"obtained":[244],"classic":[247],"signal":[248],"processing":[249],"pipeline":[250],"complementary":[252],"perception":[255],"tasks.":[256],"code":[260],"released":[262],"project":[265],"page:":[266],"https://www.trace.ethz.ch/publications/2020/sound_perception/index.html.":[267]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
