{"id":"https://openalex.org/W4395470973","doi":"https://doi.org/10.1109/taslp.2024.3393772","title":"Visually Grounded Few-Shot Word Learning in Low-Resource Settings","display_name":"Visually Grounded Few-Shot Word Learning in Low-Resource Settings","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4395470973","doi":"https://doi.org/10.1109/taslp.2024.3393772"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3393772","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3393772","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.1109/TASLP.2024.3393772","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020033363","display_name":"Leanne Nortje","orcid":null},"institutions":[{"id":"https://openalex.org/I26092322","display_name":"Stellenbosch University","ror":"https://ror.org/05bk57929","country_code":"ZA","type":"education","lineage":["https://openalex.org/I26092322"]}],"countries":["ZA"],"is_corresponding":true,"raw_author_name":"Leanne Nortje","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, Stellenbosch University, Stellenbosch, South Africa","Department of Electrical and Electronic Engineering,, Stellenbosch University, Stellenbosch, South Africa"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, Stellenbosch University, Stellenbosch, South Africa","institution_ids":["https://openalex.org/I26092322"]},{"raw_affiliation_string":"Department of Electrical and Electronic Engineering,, Stellenbosch University, Stellenbosch, South Africa","institution_ids":["https://openalex.org/I26092322"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013389452","display_name":"Dan Onea\u0163\u0103","orcid":"https://orcid.org/0000-0003-4354-4393"},"institutions":[{"id":"https://openalex.org/I61641377","display_name":"Universitatea Na\u021bional\u0103 de \u0218tiin\u021b\u0103 \u0219i Tehnologie Politehnica Bucure\u0219ti","ror":"https://ror.org/0558j5q12","country_code":"RO","type":"education","lineage":["https://openalex.org/I61641377"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Dan Onea\u0163\u0103","raw_affiliation_strings":["University Politehnica of Bucharest, Bucharest, Romania"],"affiliations":[{"raw_affiliation_string":"University Politehnica of Bucharest, Bucharest, Romania","institution_ids":["https://openalex.org/I61641377"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040305929","display_name":"Herman Kamper","orcid":"https://orcid.org/0000-0003-2980-3475"},"institutions":[{"id":"https://openalex.org/I26092322","display_name":"Stellenbosch University","ror":"https://ror.org/05bk57929","country_code":"ZA","type":"education","lineage":["https://openalex.org/I26092322"]}],"countries":["ZA"],"is_corresponding":false,"raw_author_name":"Herman Kamper","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, Stellenbosch University, Stellenbosch, South Africa","Department of Electrical and Electronic Engineering,, Stellenbosch University, Stellenbosch, South Africa"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, Stellenbosch University, Stellenbosch, South Africa","institution_ids":["https://openalex.org/I26092322"]},{"raw_affiliation_string":"Department of Electrical and Electronic Engineering,, Stellenbosch University, Stellenbosch, South Africa","institution_ids":["https://openalex.org/I26092322"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5020033363"],"corresponding_institution_ids":["https://openalex.org/I26092322"],"apc_list":null,"apc_paid":null,"fwci":1.0526,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.76474179,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"32","issue":null,"first_page":"2544","last_page":"2554"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.6149281859397888},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.6123970746994019},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5954769253730774},{"id":"https://openalex.org/keywords/grounded-theory","display_name":"Grounded theory","score":0.56195068359375},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4606812596321106},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.33721548318862915},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3343557119369507},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.32047271728515625},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.2356310486793518},{"id":"https://openalex.org/keywords/qualitative-research","display_name":"Qualitative research","score":0.19991931319236755},{"id":"https://openalex.org/keywords/social-science","display_name":"Social science","score":0.060990750789642334}],"concepts":[{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.6149281859397888},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.6123970746994019},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5954769253730774},{"id":"https://openalex.org/C156325361","wikidata":"https://www.wikidata.org/wiki/Q1152864","display_name":"Grounded theory","level":3,"score":0.56195068359375},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4606812596321106},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.33721548318862915},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3343557119369507},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32047271728515625},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.2356310486793518},{"id":"https://openalex.org/C190248442","wikidata":"https://www.wikidata.org/wiki/Q839486","display_name":"Qualitative research","level":2,"score":0.19991931319236755},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.060990750789642334},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/taslp.2024.3393772","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3393772","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:zenodo.org:13985458","is_oa":true,"landing_page_url":"https://doi.org/10.1109/TASLP.2024.3393772","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing, 32, 2544-2554, (2024-04-25)","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"pmh:oai:zenodo.org:13985458","is_oa":true,"landing_page_url":"https://doi.org/10.1109/TASLP.2024.3393772","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing, 32, 2544-2554, (2024-04-25)","raw_type":"info:eu-repo/semantics/article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W68733909","https://openalex.org/W1494198834","https://openalex.org/W1861492603","https://openalex.org/W2013596317","https://openalex.org/W2067816745","https://openalex.org/W2074231493","https://openalex.org/W2091746061","https://openalex.org/W2108598243","https://openalex.org/W2132730112","https://openalex.org/W2194775991","https://openalex.org/W2415378728","https://openalex.org/W2618530766","https://openalex.org/W2747874407","https://openalex.org/W2802557066","https://openalex.org/W2889313720","https://openalex.org/W2938991416","https://openalex.org/W2950133079","https://openalex.org/W2962862718","https://openalex.org/W2963330681","https://openalex.org/W2963571336","https://openalex.org/W2963720603","https://openalex.org/W2963902314","https://openalex.org/W2964249784","https://openalex.org/W3049010983","https://openalex.org/W3095361818","https://openalex.org/W3102219307","https://openalex.org/W3131709275","https://openalex.org/W3146777637","https://openalex.org/W3157861865","https://openalex.org/W3174311593","https://openalex.org/W3198134274","https://openalex.org/W3198411039","https://openalex.org/W3198749384","https://openalex.org/W3209059054","https://openalex.org/W4234899042","https://openalex.org/W4297841895","https://openalex.org/W4306317873","https://openalex.org/W4319862234","https://openalex.org/W4319862278","https://openalex.org/W4372267276","https://openalex.org/W4385807432","https://openalex.org/W4385823277","https://openalex.org/W6631190155","https://openalex.org/W6677994088","https://openalex.org/W6714354170","https://openalex.org/W6729977899"],"related_works":["https://openalex.org/W1546533203","https://openalex.org/W2054080977","https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W1511554945","https://openalex.org/W2015439768","https://openalex.org/W4361008414","https://openalex.org/W4396854307","https://openalex.org/W3040823075","https://openalex.org/W2296205523"],"abstract_inverted_index":{"We":[0,80],"propose":[1,81],"a":[2,17,23,29,63,112,144,198,203],"visually":[3],"grounded":[4],"speech":[5,138],"model":[6,35,200],"that":[7,84],"learns":[8],"new":[9,128,154],"words":[10],"and":[11,28,98,139],"their":[12],"visual":[13,181],"depictions":[14],"from":[15,133,197],"just":[16],"few":[18],"wordimage":[19],"example":[20,124],"pairs.":[21],"Given":[22],"set":[24,205],"of":[25,66,136,172,194,206],"test":[26],"images":[27],"spoken":[30],"query,":[31],"we":[32,142,156],"ask":[33],"the":[34,39,121,173,192],"which":[36],"image":[37],"depicts":[38],"query":[40],"word.":[41],"Previous":[42],"work":[43,86],"has":[44],"simplified":[45],"this":[46,102,153],"few-shot":[47,109],"learning":[48,110],"problem":[49],"by":[50,61],"either":[51],"using":[52,62,76,120],"an":[53,82,167],"artificial":[54],"setting":[55],"with":[56,92,160],"digit":[57],"word-image":[58,89,123,130,150],"pairs":[59,90,125,132],"or":[60],"large":[64,134],"number":[65],"examples":[67],"per":[68],"class.":[69],"Moreover,":[70],"all":[71],"previous":[72,164],"studies":[73],"were":[74],"performed":[75],"English":[77,169,207],"speech-image":[78,208],"data.":[79,209],"approach":[83,103,118],"can":[85,104],"on":[87,166,189,202],"natural":[88],"but":[91],"less":[93],"examples,":[94],"i.e.":[95],"fewer":[96,161],"shots,":[97],"then":[99],"illustrate":[100],"how":[101],"be":[105],"applied":[106],"for":[107],"multimodal":[108,199],"in":[111,184],"real":[113],"low-resource":[114],"language,":[115],"Yor\u00f9\u00e1.":[116],"Our":[117],"involves":[119],"given":[122],"to":[126,148,178],"mine":[127],"unsupervised":[129],"training":[131],"collections":[135],"unlabelled":[137],"images.":[140],"Additionally,":[141],"use":[143],"word-to-image":[145],"attention":[146],"mechanism":[147],"determine":[149],"similarity.":[151],"With":[152],"model,":[155],"achieve":[157],"better":[158],"performance":[159],"shots":[162],"than":[163],"approaches":[165],"existing":[168],"benchmark.":[170],"Many":[171],"model's":[174],"mistakes":[175],"are":[176],"due":[177],"confusion":[179],"between":[180],"concepts":[182],"co-occurring":[183],"similar":[185],"contexts.":[186],"The":[187],"experiments":[188],"Yor\u00f9\u00e1":[190],"show":[191],"benefit":[193],"transferring":[195],"knowledge":[196],"trained":[201],"larger":[204],"<sup":[210],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[211],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[212]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
