{"id":"https://openalex.org/W2808286951","doi":"https://doi.org/10.21437/sltu.2018-53","title":"Visually Grounded Cross-Lingual Keyword Spotting in Speech","display_name":"Visually Grounded Cross-Lingual Keyword Spotting in Speech","publication_year":2018,"publication_date":"2018-08-29","ids":{"openalex":"https://openalex.org/W2808286951","doi":"https://doi.org/10.21437/sltu.2018-53","mag":"2808286951"},"language":"en","primary_location":{"id":"doi:10.21437/sltu.2018-53","is_oa":false,"landing_page_url":"https://doi.org/10.21437/sltu.2018-53","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"6th Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU 2018)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1806.05030","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040305929","display_name":"Herman Kamper","orcid":"https://orcid.org/0000-0003-2980-3475"},"institutions":[{"id":"https://openalex.org/I26092322","display_name":"Stellenbosch University","ror":"https://ror.org/05bk57929","country_code":"ZA","type":"education","lineage":["https://openalex.org/I26092322"]}],"countries":["ZA"],"is_corresponding":true,"raw_author_name":"Herman Kamper","raw_affiliation_strings":["Stellenbosch University, Stellenbosch, South Africa"],"affiliations":[{"raw_affiliation_string":"Stellenbosch University, Stellenbosch, South Africa","institution_ids":["https://openalex.org/I26092322"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025079639","display_name":"Michael Roth","orcid":"https://orcid.org/0000-0002-9128-519X"},"institutions":[{"id":"https://openalex.org/I91712215","display_name":"Saarland University","ror":"https://ror.org/01jdpyv68","country_code":"DE","type":"education","lineage":["https://openalex.org/I91712215"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Michael Roth","raw_affiliation_strings":["Saarland University, Saarbr\u00fccken, Germany"],"affiliations":[{"raw_affiliation_string":"Saarland University, Saarbr\u00fccken, Germany","institution_ids":["https://openalex.org/I91712215"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5040305929"],"corresponding_institution_ids":["https://openalex.org/I26092322"],"apc_list":null,"apc_paid":null,"fwci":0.1062,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.43641448,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"253","last_page":"257"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/keyword-spotting","display_name":"Keyword spotting","score":0.95904940366745},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.861022412776947},{"id":"https://openalex.org/keywords/german","display_name":"German","score":0.7004895806312561},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6716474294662476},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5413799285888672},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5386438369750977},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5365142226219177},{"id":"https://openalex.org/keywords/spotting","display_name":"Spotting","score":0.4745129942893982},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.4149213135242462},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.4128918945789337},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.2605612277984619},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1964530646800995}],"concepts":[{"id":"https://openalex.org/C2781213101","wikidata":"https://www.wikidata.org/wiki/Q6398558","display_name":"Keyword spotting","level":2,"score":0.95904940366745},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.861022412776947},{"id":"https://openalex.org/C154775046","wikidata":"https://www.wikidata.org/wiki/Q188","display_name":"German","level":2,"score":0.7004895806312561},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6716474294662476},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5413799285888672},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5386438369750977},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5365142226219177},{"id":"https://openalex.org/C2779506182","wikidata":"https://www.wikidata.org/wiki/Q7580141","display_name":"Spotting","level":2,"score":0.4745129942893982},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.4149213135242462},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.4128918945789337},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.2605612277984619},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1964530646800995},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.21437/sltu.2018-53","is_oa":false,"landing_page_url":"https://doi.org/10.21437/sltu.2018-53","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"6th Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU 2018)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1806.05030","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1806.05030","pdf_url":"https://arxiv.org/pdf/1806.05030","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2808286951","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1806.05030","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1806.05030","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1806.05030","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1806.05030","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1806.05030","pdf_url":"https://arxiv.org/pdf/1806.05030","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.800000011920929}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2808286951.pdf","grobid_xml":"https://content.openalex.org/works/W2808286951.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2744487470","https://openalex.org/W2586236392","https://openalex.org/W2113226971","https://openalex.org/W2036025621","https://openalex.org/W2900378279","https://openalex.org/W2109026138","https://openalex.org/W103463988","https://openalex.org/W2886202000","https://openalex.org/W2610983683","https://openalex.org/W2167235186","https://openalex.org/W3032541519","https://openalex.org/W2089316759","https://openalex.org/W3115650992","https://openalex.org/W2115513413","https://openalex.org/W2992245086","https://openalex.org/W2075737140","https://openalex.org/W3131542548","https://openalex.org/W3173025111","https://openalex.org/W3022136768","https://openalex.org/W2129059886"],"abstract_inverted_index":{"Recent":[0],"work":[1],"considered":[2],"how":[3],"images":[4],"paired":[5],"with":[6,78],"speech":[7,15,59,77,104],"can":[8,27],"be":[9,28],"used":[10,29],"as":[11],"supervision":[12],"for":[13,30],"building":[14],"systems":[16],"when":[17],"transcriptions":[18],"are":[19],"not":[20],"available.":[21],"We":[22,123],"ask":[23],"whether":[24],"visual":[25,85],"grounding":[26],"cross-lingual":[31],"keyword":[32,37,50,89],"spotting:":[33],"given":[34],"a":[35,61,68,72,83,98,117],"text":[36,65],"in":[38,51,60,67],"one":[39],"language,":[40],"the":[41,114],"task":[42],"is":[43],"to":[44,87,91,101,105,140],"retrieve":[45],"spoken":[46],"utterances":[47],"containing":[48],"that":[49,125],"another":[52],"language.":[53,70],"This":[54],"could":[55],"enable":[56],"searching":[57],"through":[58],"low-resource":[62],"language":[63],"using":[64],"queries":[66],"high-resource":[69],"As":[71],"proof-of-concept,":[73],"we":[74,81],"use":[75,82],"English":[76,103],"German":[79,84,106],"queries:":[80],"tagger":[86],"add":[88],"labels":[90],"each":[92],"training":[93],"image,":[94],"and":[95],"then":[96],"train":[97],"neural":[99],"network":[100],"map":[102],"keywords.":[107],"Without":[108],"seeing":[109],"parallel":[110],"speech-transcriptions":[111],"or":[112,131],"translations,":[113],"model":[115],"achieves":[116],"precision":[118],"at":[119],"ten":[120],"of":[121],"58%.":[122],"show":[124],"most":[126],"erroneous":[127],"retrievals":[128],"contain":[129],"equivalent":[130],"semantically":[132],"relevant":[133],"keywords;":[134],"excluding":[135],"these":[136],"would":[137],"improve":[138],"P@10":[139],"91%.":[141]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
