{"id":"https://openalex.org/W4402594377","doi":"https://doi.org/10.1109/avss61716.2024.10672572","title":"Grounded Language Acquisition from Object and Action Imagery","display_name":"Grounded Language Acquisition from Object and Action Imagery","publication_year":2024,"publication_date":"2024-07-15","ids":{"openalex":"https://openalex.org/W4402594377","doi":"https://doi.org/10.1109/avss61716.2024.10672572"},"language":"en","primary_location":{"id":"doi:10.1109/avss61716.2024.10672572","is_oa":false,"landing_page_url":"https://doi.org/10.1109/avss61716.2024.10672572","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Advanced Video and Signal Based Surveillance (AVSS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068691088","display_name":"James Kubricht","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"James Kubricht","raw_affiliation_strings":["GE Aerospace Research"],"affiliations":[{"raw_affiliation_string":"GE Aerospace Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056835845","display_name":"Zhaoyuan Yang","orcid":"https://orcid.org/0000-0003-0011-0093"},"institutions":[{"id":"https://openalex.org/I4401726784","display_name":"GE Vernova (United States)","ror":"https://ror.org/04n60x970","country_code":null,"type":"company","lineage":["https://openalex.org/I1332737386","https://openalex.org/I4401726784"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Zhaoyuan Yang","raw_affiliation_strings":["GE Vernova Research"],"affiliations":[{"raw_affiliation_string":"GE Vernova Research","institution_ids":["https://openalex.org/I4401726784"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110466155","display_name":"Jianwei Qiu","orcid":null},"institutions":[{"id":"https://openalex.org/I189448455","display_name":"General Electric (Spain)","ror":"https://ror.org/04gbh9b75","country_code":"ES","type":"company","lineage":["https://openalex.org/I1332737386","https://openalex.org/I189448455"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Jianwei Qiu","raw_affiliation_strings":["GE HealthCare Research"],"affiliations":[{"raw_affiliation_string":"GE HealthCare Research","institution_ids":["https://openalex.org/I189448455"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113686023","display_name":"Peter Tu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peter Tu","raw_affiliation_strings":["GE Aerospace Research"],"affiliations":[{"raw_affiliation_string":"GE Aerospace Research","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5068691088"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3637,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.66764452,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.753000020980835,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.753000020980835,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6902388334274292},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.594101071357727},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5918110013008118},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4055938124656677},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3859630525112152},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.368344247341156},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.32883691787719727}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6902388334274292},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.594101071357727},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5918110013008118},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4055938124656677},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3859630525112152},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.368344247341156},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.32883691787719727},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/avss61716.2024.10672572","is_oa":false,"landing_page_url":"https://doi.org/10.1109/avss61716.2024.10672572","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Advanced Video and Signal Based Surveillance (AVSS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W2096733369","https://openalex.org/W2187089797","https://openalex.org/W2251410821","https://openalex.org/W2466618734","https://openalex.org/W2888912391","https://openalex.org/W2962858109","https://openalex.org/W2963681240","https://openalex.org/W2997964416","https://openalex.org/W3010312500","https://openalex.org/W3016009659","https://openalex.org/W3016815617","https://openalex.org/W3027465653","https://openalex.org/W3034622111","https://openalex.org/W3080094092","https://openalex.org/W3092374087","https://openalex.org/W3095766503","https://openalex.org/W3103247365","https://openalex.org/W3164271814","https://openalex.org/W3196684543","https://openalex.org/W4226278401","https://openalex.org/W4251051282","https://openalex.org/W4287765185","https://openalex.org/W4362515116","https://openalex.org/W6738795958","https://openalex.org/W6778886082","https://openalex.org/W6782322898","https://openalex.org/W6810738896","https://openalex.org/W6851775633"],"related_works":["https://openalex.org/W2737719445","https://openalex.org/W4239098401","https://openalex.org/W2898210368","https://openalex.org/W2382480268","https://openalex.org/W1976518449","https://openalex.org/W2732837990","https://openalex.org/W2363366881","https://openalex.org/W4206198161","https://openalex.org/W2363276194","https://openalex.org/W2604548540"],"abstract_inverted_index":{"Deep":[0],"learning":[1,70],"approaches":[2],"to":[3,90,98,147,166,187,191],"natural":[4],"language":[5,47,55],"processing":[6],"have":[7,17],"made":[8],"great":[9],"strides":[10],"in":[11,32,58,105,154,178,198],"recent":[12],"years.":[13],"While":[14],"transformer":[15],"models":[16],"demonstrated":[18],"impressive":[19],"knowledge":[20],"and":[21,66,85,112,131,201],"reasoning":[22],"capabilities,":[23],"it":[24],"is":[25],"unclear":[26],"how":[27],"produced":[28,122,151],"symbols":[29,150,177,185],"are":[30],"grounded":[31],"data":[33,50,153],"from":[34,126,140],"the":[35,42,149,208],"world.":[36],"In":[37,145],"this":[38,203],"paper,":[39],"we":[40],"explore":[41],"development":[43],"of":[44,95,120,210],"a":[45,61,68,73,118,157,211],"private":[46],"for":[48,132,152],"visual":[49],"representation":[51],"by":[52,123],"training":[53,76],"emergent":[54],"(EL)":[56],"encoders/decoders":[57],"both":[59],"i)":[60,184],"traditional":[62],"referential":[63],"game":[64],"environment":[65,71],"ii)":[67,202],"contrastive":[69],"utilizing":[72],"within-class":[74],"matching":[75],"paradigm.":[77],"An":[78],"additional":[79],"classification":[80],"layer-utilizing":[81],"neural":[82],"machine":[83],"translation":[84],"random":[86],"forest":[87],"classification-was":[88],"used":[89,130,165,186],"transform":[91],"symbolic":[92],"representations":[93],"(sequences":[94],"integer":[96],"symbols)":[97],"class":[99],"labels.":[100],"These":[101],"methods":[102],"were":[103,138],"applied":[104],"two":[106],"experiments":[107],"focusing":[108],"on":[109],"object":[110,116],"recognition":[111],"action":[113,133],"recognition.":[114],"For":[115],"recognition,":[117,134],"set":[119],"sketches":[121],"human":[124],"participants":[125],"real":[127],"imagery":[128],"was":[129,164],"2D":[135],"trajectory":[136],"images":[137,189],"generated":[139],"3D":[141],"motion":[142],"capture":[143],"systems.":[144],"order":[146],"interpret":[148],"each":[155],"experiment,":[156],"Gradient-weighted":[158],"Class":[159],"Activation":[160],"Mapping":[161],"(GradCAM)":[162],"method":[163],"identify":[167],"pixel":[168],"regions":[169],"indicating":[170],"semantic":[171,196],"features":[172],"which":[173],"contribute":[174],"evidence":[175],"towards":[176],"learned":[179],"languages.":[180],"Results":[181],"indicate":[182],"that:":[183],"represent":[188],"appear":[190],"shift":[192,204],"focus":[193],"between":[194],"different":[195],"components":[197],"an":[199],"image":[200],"occurs":[205],"gradually":[206],"over":[207],"course":[209],"sentence.":[212]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
