{"id":"https://openalex.org/W4391841607","doi":"https://doi.org/10.1007/s00354-024-00243-8","title":"Neuro-Symbolic Reasoning for Multimodal Referring Expression Comprehension in HMI Systems","display_name":"Neuro-Symbolic Reasoning for Multimodal Referring Expression Comprehension in HMI Systems","publication_year":2024,"publication_date":"2024-02-15","ids":{"openalex":"https://openalex.org/W4391841607","doi":"https://doi.org/10.1007/s00354-024-00243-8"},"language":"en","primary_location":{"id":"doi:10.1007/s00354-024-00243-8","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00354-024-00243-8","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00354-024-00243-8.pdf","source":{"id":"https://openalex.org/S165364243","display_name":"New Generation Computing","issn_l":"0288-3635","issn":["0288-3635","1882-7055"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"New Generation Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s00354-024-00243-8.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Aman Jain","orcid":"https://orcid.org/0009-0004-9495-0396"},"institutions":[{"id":"https://openalex.org/I1283473643","display_name":"Honda (Japan)","ror":"https://ror.org/03jzay846","country_code":"JP","type":"company","lineage":["https://openalex.org/I1283473643"]},{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Aman Jain","raw_affiliation_strings":["Honda R &D Co. Ltd., Tokyo, Japan","The University of Tokyo, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0009-0004-9495-0396","affiliations":[{"raw_affiliation_string":"Honda R &D Co. Ltd., Tokyo, Japan","institution_ids":["https://openalex.org/I1283473643"]},{"raw_affiliation_string":"The University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084547388","display_name":"Anirudh Reddy Kondapally","orcid":null},"institutions":[{"id":"https://openalex.org/I1283473643","display_name":"Honda (Japan)","ror":"https://ror.org/03jzay846","country_code":"JP","type":"company","lineage":["https://openalex.org/I1283473643"]},{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Anirudh Reddy Kondapally","raw_affiliation_strings":["Honda R &D Co. Ltd., Tokyo, Japan","The University of Tokyo, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Honda R &D Co. Ltd., Tokyo, Japan","institution_ids":["https://openalex.org/I1283473643"]},{"raw_affiliation_string":"The University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100616622","display_name":"Kentaro Yamada","orcid":"https://orcid.org/0009-0003-8202-7417"},"institutions":[{"id":"https://openalex.org/I1283473643","display_name":"Honda (Japan)","ror":"https://ror.org/03jzay846","country_code":"JP","type":"company","lineage":["https://openalex.org/I1283473643"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kentaro Yamada","raw_affiliation_strings":["Honda R &D Co. Ltd., Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Honda R &D Co. Ltd., Tokyo, Japan","institution_ids":["https://openalex.org/I1283473643"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045824013","display_name":"Hitomi Yanaka","orcid":"https://orcid.org/0000-0003-0354-6116"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hitomi Yanaka","raw_affiliation_strings":["The University of Tokyo, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I1283473643","https://openalex.org/I74801974"],"apc_list":{"value":2490,"currency":"EUR","value_usd":3090},"apc_paid":{"value":2490,"currency":"EUR","value_usd":3090},"fwci":0.4488,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.58167608,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"42","issue":"4","first_page":"579","last_page":"598"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9214246869087219},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.6737428307533264},{"id":"https://openalex.org/keywords/comprehension","display_name":"Comprehension","score":0.5929522514343262},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40882039070129395},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.39445948600769043},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3934631049633026},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.34792280197143555}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9214246869087219},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.6737428307533264},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.5929522514343262},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40882039070129395},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39445948600769043},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3934631049633026},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.34792280197143555}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s00354-024-00243-8","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00354-024-00243-8","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00354-024-00243-8.pdf","source":{"id":"https://openalex.org/S165364243","display_name":"New Generation Computing","issn_l":"0288-3635","issn":["0288-3635","1882-7055"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"New Generation Computing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s00354-024-00243-8","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00354-024-00243-8","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00354-024-00243-8.pdf","source":{"id":"https://openalex.org/S165364243","display_name":"New Generation Computing","issn_l":"0288-3635","issn":["0288-3635","1882-7055"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"New Generation Computing","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6499999761581421}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322832","display_name":"University of Tokyo","ror":"https://ror.org/057zh3y96"},{"id":"https://openalex.org/F4320338111","display_name":"Precursory Research for Embryonic Science and Technology","ror":null}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4391841607.pdf"},"referenced_works_count":39,"referenced_works":["https://openalex.org/W1505952289","https://openalex.org/W1933349210","https://openalex.org/W2014268383","https://openalex.org/W2036439337","https://openalex.org/W2048343491","https://openalex.org/W2077069816","https://openalex.org/W2103362776","https://openalex.org/W2111305343","https://openalex.org/W2117539524","https://openalex.org/W2118020555","https://openalex.org/W2123442489","https://openalex.org/W2135643110","https://openalex.org/W2151308976","https://openalex.org/W2157725521","https://openalex.org/W2161319957","https://openalex.org/W2166200269","https://openalex.org/W2277195237","https://openalex.org/W2471094925","https://openalex.org/W2489434015","https://openalex.org/W2493916176","https://openalex.org/W2552788314","https://openalex.org/W2561715562","https://openalex.org/W2903343986","https://openalex.org/W2907143950","https://openalex.org/W2915977242","https://openalex.org/W2921106744","https://openalex.org/W2962716332","https://openalex.org/W2962884579","https://openalex.org/W2963260436","https://openalex.org/W3090449556","https://openalex.org/W3093588714","https://openalex.org/W3188422869","https://openalex.org/W3197501724","https://openalex.org/W3201770677","https://openalex.org/W4285295252","https://openalex.org/W4376226279","https://openalex.org/W4379929801","https://openalex.org/W4386065815","https://openalex.org/W6600740517"],"related_works":["https://openalex.org/W2616627668","https://openalex.org/W3137121595","https://openalex.org/W2392243736","https://openalex.org/W86652014","https://openalex.org/W2051345519","https://openalex.org/W1991530724","https://openalex.org/W2023344535","https://openalex.org/W2383982204","https://openalex.org/W2373862202","https://openalex.org/W2388232445"],"abstract_inverted_index":{"Abstract":[0],"Conventional":[1],"Human\u2013Machine":[2],"Interaction":[3],"(HMI)":[4],"interfaces":[5],"have":[6,34],"predominantly":[7],"relied":[8],"on":[9,151],"GUI":[10],"and":[11,48,76,93,144,155,192],"voice":[12,52],"commands.":[13,53],"However,":[14,54],"natural":[15],"human":[16],"communication":[17],"also":[18],"consists":[19],"of":[20,74,112,195,211],"non-verbal":[21],"communication,":[22],"including":[23],"hand":[24],"gestures":[25,39,102],"like":[26,103],"pointing.":[27,104],"Thus,":[28],"recent":[29],"works":[30],"in":[31,46,90,134],"HMI":[32,124,186],"systems":[33,125],"tried":[35],"to":[36,66,159,198,202],"incorporate":[37],"pointing":[38,127],"as":[40],"an":[41,139,188,193,207],"input,":[42],"making":[43],"significant":[44],"progress":[45],"recognizing":[47],"integrating":[49],"them":[50],"with":[51,141,147,173],"existing":[55],"approaches":[56],"often":[57],"treat":[58],"these":[59,97],"input":[60],"modalities":[61],"independently,":[62],"limiting":[63],"their":[64],"capacity":[65],"handle":[67],"complex":[68,85],"multimodal":[69,82,115,123,182],"instructions":[70],"requiring":[71,84],"intricate":[72],"reasoning":[73,86,201],"language":[75,92,154],"gestures.":[77,128],"On":[78],"the":[79,91,113,152,200,212],"other":[80],"hand,":[81],"tasks":[83],"are":[87],"being":[88],"challenged":[89],"vision":[94],"domain,":[95],"but":[96],"typically":[98],"do":[99],"not":[100],"include":[101,179],"To":[105],"bridge":[106],"this":[107,161],"gap,":[108],"we":[109,163],"explore":[110],"one":[111],"challenging":[114,181],"tasks,":[116],"called":[117],"Referring":[118],"Expression":[119],"Comprehension":[120],"(REC),":[121],"within":[122],"incorporating":[126],"We":[129],"present":[130],"a":[131,136,142,165,180],"virtual":[132],"setup":[133],"which":[135],"robot":[137],"shares":[138],"environment":[140],"user":[143],"is":[145],"tasked":[146],"identifying":[148],"objects":[149],"based":[150],"user\u2019s":[153],"gestural":[156],"instructions.":[157],"Furthermore,":[158],"address":[160],"challenge,":[162],"propose":[164],"hybrid":[166],"neuro-symbolic":[167,190],"model":[168],"combining":[169],"deep":[170],"learning\u2019s":[171],"versatility":[172],"symbolic":[174],"reasoning\u2019s":[175],"interpretability.":[176],"Our":[177],"contributions":[178],"REC":[183],"dataset":[184],"for":[185],"systems,":[187],"interpretable":[189],"model,":[191],"assessment":[194],"its":[196],"ability":[197],"generalize":[199],"unseen":[203],"environments,":[204],"complemented":[205],"by":[206],"in-depth":[208],"qualitative":[209],"analysis":[210],"model\u2019s":[213],"inner":[214],"workings.":[215]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
