{"id":"https://openalex.org/W4312781034","doi":"https://doi.org/10.1109/ijcnn55064.2022.9892420","title":"Semantic VL-BERT: Visual Grounding via Attribute Learning","display_name":"Semantic VL-BERT: Visual Grounding via Attribute Learning","publication_year":2022,"publication_date":"2022-07-18","ids":{"openalex":"https://openalex.org/W4312781034","doi":"https://doi.org/10.1109/ijcnn55064.2022.9892420"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn55064.2022.9892420","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn55064.2022.9892420","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081500689","display_name":"Prashan Wanigasekara","orcid":"https://orcid.org/0000-0002-9453-9933"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Prashan Wanigasekara","raw_affiliation_strings":["Alexa AI-NU,Cambridge,USA","Alexa AI-NU, Cambridge, USA"],"affiliations":[{"raw_affiliation_string":"Alexa AI-NU,Cambridge,USA","institution_ids":[]},{"raw_affiliation_string":"Alexa AI-NU, Cambridge, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011353616","display_name":"Kechen Qin","orcid":"https://orcid.org/0000-0003-3169-575X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kechen Qin","raw_affiliation_strings":["Alexa AI-NU,Cambridge,USA","Alexa AI-NU, Cambridge, USA"],"affiliations":[{"raw_affiliation_string":"Alexa AI-NU,Cambridge,USA","institution_ids":[]},{"raw_affiliation_string":"Alexa AI-NU, Cambridge, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017523558","display_name":"Emre Barut","orcid":"https://orcid.org/0000-0003-3064-6227"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Emre Barut","raw_affiliation_strings":["Alexa AI-NU,Cambridge,USA","Alexa AI-NU, Cambridge, USA"],"affiliations":[{"raw_affiliation_string":"Alexa AI-NU,Cambridge,USA","institution_ids":[]},{"raw_affiliation_string":"Alexa AI-NU, Cambridge, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100614031","display_name":"Fan Yang","orcid":"https://orcid.org/0000-0002-4856-1929"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan Yang","raw_affiliation_strings":["Alexa AI-NU,Cambridge,USA","Alexa AI-NU, Cambridge, USA"],"affiliations":[{"raw_affiliation_string":"Alexa AI-NU,Cambridge,USA","institution_ids":[]},{"raw_affiliation_string":"Alexa AI-NU, Cambridge, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101906419","display_name":"Weitong Ruan","orcid":"https://orcid.org/0000-0003-1066-513X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weitong Ruan","raw_affiliation_strings":["Alexa AI-NU,Cambridge,USA","Alexa AI-NU, Cambridge, USA"],"affiliations":[{"raw_affiliation_string":"Alexa AI-NU,Cambridge,USA","institution_ids":[]},{"raw_affiliation_string":"Alexa AI-NU, Cambridge, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043281010","display_name":"Chengwei Su","orcid":"https://orcid.org/0000-0003-1492-723X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chengwei Su","raw_affiliation_strings":["Alexa AI-NU,Cambridge,USA","Alexa AI-NU, Cambridge, USA"],"affiliations":[{"raw_affiliation_string":"Alexa AI-NU,Cambridge,USA","institution_ids":[]},{"raw_affiliation_string":"Alexa AI-NU, Cambridge, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5081500689"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2399,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.59974333,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8421441316604614},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6868586540222168},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.5298293232917786},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.49769309163093567},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.4887204170227051},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36831849813461304},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3570253252983093}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8421441316604614},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6868586540222168},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.5298293232917786},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.49769309163093567},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.4887204170227051},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36831849813461304},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3570253252983093},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn55064.2022.9892420","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn55064.2022.9892420","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.5299999713897705,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W2121879602","https://openalex.org/W2133280805","https://openalex.org/W2251512949","https://openalex.org/W2277195237","https://openalex.org/W2489434015","https://openalex.org/W2886641317","https://openalex.org/W2896457183","https://openalex.org/W2963037989","https://openalex.org/W2964345792","https://openalex.org/W2965373594","https://openalex.org/W2966715458","https://openalex.org/W2968124245","https://openalex.org/W2969876226","https://openalex.org/W2970231061","https://openalex.org/W2975501350","https://openalex.org/W2975706270","https://openalex.org/W2997591391","https://openalex.org/W3014611590","https://openalex.org/W3038476992","https://openalex.org/W3091588028","https://openalex.org/W3096609285","https://openalex.org/W3110570034","https://openalex.org/W3120237956","https://openalex.org/W3124149278","https://openalex.org/W3126792443","https://openalex.org/W3128099838","https://openalex.org/W3134095442","https://openalex.org/W3159619744","https://openalex.org/W3164540605","https://openalex.org/W3170874841","https://openalex.org/W3216991452","https://openalex.org/W4206706211","https://openalex.org/W4229494842","https://openalex.org/W6755207826","https://openalex.org/W6766673545","https://openalex.org/W6766904570","https://openalex.org/W6767211374","https://openalex.org/W6767279747","https://openalex.org/W6768438993","https://openalex.org/W6775188310","https://openalex.org/W6775970589","https://openalex.org/W6778485988","https://openalex.org/W6780137884","https://openalex.org/W6787034138","https://openalex.org/W6788135285","https://openalex.org/W6788554130","https://openalex.org/W6788556936","https://openalex.org/W6789705400","https://openalex.org/W6789753369","https://openalex.org/W6794797104","https://openalex.org/W6795467770"],"related_works":["https://openalex.org/W1556451512","https://openalex.org/W1555349535","https://openalex.org/W4234091740","https://openalex.org/W4213350282","https://openalex.org/W2230171082","https://openalex.org/W2583128298","https://openalex.org/W2022275305","https://openalex.org/W1604115909","https://openalex.org/W2369125128","https://openalex.org/W2134423494"],"abstract_inverted_index":{"In":[0,39],"recent":[1],"years,":[2],"Smart":[3,160],"Home":[4,161],"Assistants":[5,162],"have":[6,163],"expanded":[7],"into":[8],"tens":[9],"of":[10,12,63,116,134,177],"thousands":[11],"devices":[13],"and":[14,72,121,131,167,215],"transformed":[15],"from":[16],"a":[17,22,30,35,77,83,90,145,151,175,201,208],"voice":[18],"only":[19],"assistant":[20],"to":[21,33,41,69,99,113,174,188,198,207,213],"much":[23],"more":[24,140],"versatile":[25],"smart":[26],"assistant,":[27],"that":[28,52,87,158,210],"uses":[29],"connected":[31],"display":[32],"provide":[34],"multi-modal":[36],"customer":[37],"experience.":[38],"order":[40],"further":[42],"improve":[43,199],"on":[44,60,76,125,150],"the":[45,61,102,107,117,129,135,155,190],"multi-modality":[46],"experience,":[47],"comprehension":[48],"systems":[49],"need":[50],"models":[51],"can":[53],"work":[54],"with":[55,71,106],"multisensory":[56],"inputs.":[57],"We":[58,81],"focus":[59,173,185],"problem":[62],"visual":[64,103,195],"grounding,":[65],"which":[66,205],"allows":[67],"customers":[68],"interact":[70],"manipulate":[73],"items":[74],"displayed":[75],"screen":[78],"via":[79],"voice.":[80],"propose":[82],"novel":[84],"learning":[85],"approach":[86,111],"improves":[88],"upon":[89,200],"lightweight":[91,178,202],"single":[92,179],"stream":[93,180],"transformer":[94,181,203],"architecture":[95,204],"by":[96,144],"adjusting":[97],"it":[98],"better":[100],"align":[101],"input":[104],"features":[105],"referring":[108],"expressions.":[109],"Our":[110],"learns":[112],"cluster":[114],"parts":[115],"image":[118],"along":[119],"spatial":[120],"channel":[122],"dimensions":[123],"based":[124],"descriptive":[126],"attributes":[127],"in":[128,137,194],"query,":[130],"takes":[132],"advantage":[133],"information":[136],"separate":[138],"clusters":[139],"efficiently,":[141],"as":[142],"demonstrated":[143],"1.32%":[146],"absolute":[147],"accuracy":[148],"improvement":[149],"public":[152],"dataset":[153],"over":[154],"baseline.":[156],"Given":[157],"modern-day":[159],"very":[164],"stringent":[165],"memory":[166],"latency":[168],"requirements,":[169],"we":[170],"restrict":[171],"our":[172,184],"family":[176],"architectures":[182],"-":[183],"is":[186,211],"not":[187],"beat":[189],"ever":[191],"improving":[192],"state-of-the-art":[193],"grounding":[196],"but":[197],"leads":[206],"model":[209],"easy":[212],"train":[214],"deploy":[216],"while":[217],"having":[218],"improved":[219],"semantic":[220],"awareness.":[221]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
