{"id":"https://openalex.org/W3200763177","doi":"https://doi.org/10.1109/icra46639.2022.9811895","title":"Audio-Visual Grounding Referring Expression for Robotic Manipulation","display_name":"Audio-Visual Grounding Referring Expression for Robotic Manipulation","publication_year":2022,"publication_date":"2022-05-23","ids":{"openalex":"https://openalex.org/W3200763177","doi":"https://doi.org/10.1109/icra46639.2022.9811895","mag":"3200763177"},"language":"en","primary_location":{"id":"doi:10.1109/icra46639.2022.9811895","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra46639.2022.9811895","pdf_url":null,"source":{"id":"https://openalex.org/S4363607759","display_name":"2022 International Conference on Robotics and Automation (ICRA)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101893513","display_name":"Yefei Wang","orcid":"https://orcid.org/0000-0002-7228-7712"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yefei Wang","raw_affiliation_strings":["Institute for Artificial Intelligence, Tsinghua University,Beijing National Research Center for Information Science and Technology,Department of Computer Science and Technology,Beijing,China,100084","School of Electronic and Information Engineering, Nanjing University of Information Science and Technology"],"affiliations":[{"raw_affiliation_string":"Institute for Artificial Intelligence, Tsinghua University,Beijing National Research Center for Information Science and Technology,Department of Computer Science and Technology,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"School of Electronic and Information Engineering, Nanjing University of Information Science and Technology","institution_ids":["https://openalex.org/I200845125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100757020","display_name":"Kaili Wang","orcid":"https://orcid.org/0000-0001-6808-2530"},"institutions":[{"id":"https://openalex.org/I18452120","display_name":"Yantai University","ror":"https://ror.org/01rp41m56","country_code":"CN","type":"education","lineage":["https://openalex.org/I18452120"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaili Wang","raw_affiliation_strings":["School of Physics and Electronic Information, Yantai University"],"affiliations":[{"raw_affiliation_string":"School of Physics and Electronic Information, Yantai University","institution_ids":["https://openalex.org/I18452120"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100748818","display_name":"Wang Yi","orcid":"https://orcid.org/0000-0002-2994-6110"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Wang","raw_affiliation_strings":["Institute for Artificial Intelligence, Tsinghua University,Beijing National Research Center for Information Science and Technology,Department of Computer Science and Technology,Beijing,China,100084","School of Electronic and Information Engineering, Nanjing University of Information Science and Technology"],"affiliations":[{"raw_affiliation_string":"Institute for Artificial Intelligence, Tsinghua University,Beijing National Research Center for Information Science and Technology,Department of Computer Science and Technology,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"School of Electronic and Information Engineering, Nanjing University of Information Science and Technology","institution_ids":["https://openalex.org/I200845125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101833400","display_name":"Di Guo","orcid":"https://orcid.org/0000-0002-9816-0103"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Di Guo","raw_affiliation_strings":["Institute for Artificial Intelligence, Tsinghua University,Beijing National Research Center for Information Science and Technology,Department of Computer Science and Technology,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"Institute for Artificial Intelligence, Tsinghua University,Beijing National Research Center for Information Science and Technology,Department of Computer Science and Technology,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041101317","display_name":"Huaping Liu","orcid":"https://orcid.org/0000-0002-4042-6044"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huaping Liu","raw_affiliation_strings":["Institute for Artificial Intelligence, Tsinghua University,Beijing National Research Center for Information Science and Technology,Department of Computer Science and Technology,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"Institute for Artificial Intelligence, Tsinghua University,Beijing National Research Center for Information Science and Technology,Department of Computer Science and Technology,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055546056","display_name":"Fuchun Sun","orcid":"https://orcid.org/0000-0003-3546-6305"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fuchun Sun","raw_affiliation_strings":["Institute for Artificial Intelligence, Tsinghua University,Beijing National Research Center for Information Science and Technology,Department of Computer Science and Technology,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"Institute for Artificial Intelligence, Tsinghua University,Beijing National Research Center for Information Science and Technology,Department of Computer Science and Technology,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101893513"],"corresponding_institution_ids":["https://openalex.org/I200845125","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":1.0195,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.83758734,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"9258","last_page":"9264"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13310","display_name":"Subtitles and Audiovisual Media","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.8130943775177002},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8124247789382935},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6724720001220703},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5755859017372131},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.5730436444282532},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5371392965316772},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4696449637413025},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4371175467967987},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4157285988330841},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4143359363079071},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.1648399531841278},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07808703184127808}],"concepts":[{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.8130943775177002},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8124247789382935},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6724720001220703},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5755859017372131},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.5730436444282532},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5371392965316772},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4696449637413025},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4371175467967987},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4157285988330841},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4143359363079071},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.1648399531841278},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07808703184127808},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra46639.2022.9811895","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra46639.2022.9811895","pdf_url":null,"source":{"id":"https://openalex.org/S4363607759","display_name":"2022 International Conference on Robotics and Automation (ICRA)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2802338391","display_name":null,"funder_award_id":"2018YFB1305102","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2229480318","https://openalex.org/W2251512949","https://openalex.org/W2489434015","https://openalex.org/W2558535589","https://openalex.org/W2740890919","https://openalex.org/W2902699874","https://openalex.org/W2910172038","https://openalex.org/W2910881099","https://openalex.org/W2963922343","https://openalex.org/W2964345792","https://openalex.org/W2966917182","https://openalex.org/W2968344028","https://openalex.org/W2969111450","https://openalex.org/W2989791676","https://openalex.org/W2998012869","https://openalex.org/W3034578524","https://openalex.org/W3035097537","https://openalex.org/W3089887959","https://openalex.org/W3108332675","https://openalex.org/W3110918234","https://openalex.org/W3112077297","https://openalex.org/W3159312262","https://openalex.org/W3169884222","https://openalex.org/W3174510143","https://openalex.org/W3176232375","https://openalex.org/W3194215047","https://openalex.org/W6741999068","https://openalex.org/W6780712802","https://openalex.org/W6786504791"],"related_works":["https://openalex.org/W2271369634","https://openalex.org/W3147472394","https://openalex.org/W2047100085","https://openalex.org/W2350550760","https://openalex.org/W578794879","https://openalex.org/W2625296515","https://openalex.org/W3137890128","https://openalex.org/W1984634519","https://openalex.org/W4245955731","https://openalex.org/W2393726419"],"abstract_inverted_index":{"Referring":[0],"expressions":[1],"are":[2,54,92],"commonly":[3],"used":[4],"when":[5],"referring":[6,26,43],"to":[7,40,98],"a":[8,20,76],"specific":[9],"target":[10],"in":[11,45],"people's":[12],"daily":[13],"dialogue.":[14],"In":[15],"this":[16],"paper,":[17],"we":[18],"develop":[19],"novel":[21],"task":[22],"of":[23,102],"audio-visual":[24,62,105,118],"grounding":[25],"expression":[27,44],"for":[28,66,87],"robotic":[29],"manipulation.":[30],"The":[31],"robot":[32,113],"leverages":[33],"both":[34,94],"the":[35,42,46,51,58,100,103,112,117,123],"audio":[36],"and":[37,50,69,84,96],"visual":[38,67,80,124],"information":[39],"understand":[41],"given":[47],"manipulation":[48,85],"instruction":[49],"corresponding":[52],"manipulations":[53],"implemented.":[55],"To":[56],"solve":[57],"proposed":[59,65,104],"task,":[60],"an":[61],"framework":[63],"is":[64,109],"localization":[68],"sound":[70],"recognition.":[71],"We":[72],"have":[73],"also":[74],"established":[75],"dataset":[77],"which":[78],"contains":[79],"data,":[81],"auditory":[82],"data":[83,119],"instructions":[86],"evaluation.":[88],"Finally,":[89],"extensive":[90],"experiments":[91],"conducted":[93],"offline":[95],"online":[97],"verify":[99],"effectiveness":[101],"framework.":[106],"And":[107],"it":[108],"demonstrated":[110],"that":[111],"performs":[114],"better":[115],"with":[116,121],"than":[120],"only":[122],"data.":[125]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
