{"id":"https://openalex.org/W2891499495","doi":"https://doi.org/10.18653/v1/d18-2016","title":"KT-Speech-Crawler: Automatic Dataset Construction for Speech Recognition from YouTube Videos","display_name":"KT-Speech-Crawler: Automatic Dataset Construction for Speech Recognition from YouTube Videos","publication_year":2018,"publication_date":"2018-01-01","ids":{"openalex":"https://openalex.org/W2891499495","doi":"https://doi.org/10.18653/v1/d18-2016","mag":"2891499495"},"language":"en","primary_location":{"id":"doi:10.18653/v1/d18-2016","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d18-2016","pdf_url":"https://www.aclweb.org/anthology/D18-2016.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing: System Demonstrations","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/D18-2016.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045428440","display_name":"Egor Lakomkin","orcid":null},"institutions":[{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]},{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Egor Lakomkin","raw_affiliation_strings":["Department of Informatics, Knowledge Technology University of Hamburg Vogt-Koelln Str. 30, 22527 Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Knowledge Technology University of Hamburg Vogt-Koelln Str. 30, 22527 Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015567592","display_name":"Sven Magg","orcid":"https://orcid.org/0000-0002-0589-6585"},"institutions":[{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]},{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sven Magg","raw_affiliation_strings":["Department of Informatics, Knowledge Technology University of Hamburg Vogt-Koelln Str. 30, 22527 Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Knowledge Technology University of Hamburg Vogt-Koelln Str. 30, 22527 Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102025003","display_name":"Cornelius Weber","orcid":"https://orcid.org/0000-0001-5163-938X"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Cornelius Weber","raw_affiliation_strings":["Department of Informatics, Knowledge Technology University of Hamburg Vogt-Koelln Str. 30, 22527 Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Knowledge Technology University of Hamburg Vogt-Koelln Str. 30, 22527 Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033486668","display_name":"Stefan Wermter","orcid":"https://orcid.org/0000-0003-1343-4775"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Stefan Wermter","raw_affiliation_strings":["Department of Informatics, Knowledge Technology University of Hamburg Vogt-Koelln Str. 30, 22527 Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Knowledge Technology University of Hamburg Vogt-Koelln Str. 30, 22527 Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5045428440"],"corresponding_institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"],"apc_list":null,"apc_paid":null,"fwci":1.4925,"has_fulltext":true,"cited_by_count":16,"citation_normalized_percentile":{"value":0.83809026,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"90","last_page":"95"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7972126007080078},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.747955322265625},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.7433584928512573},{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.7054393291473389},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.5968650579452515},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.5659878253936768},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5108591914176941},{"id":"https://openalex.org/keywords/speech-analytics","display_name":"Speech analytics","score":0.48568400740623474},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48228719830513},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4723286032676697},{"id":"https://openalex.org/keywords/reverberation","display_name":"Reverberation","score":0.46698468923568726},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.4362228214740753},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.42312490940093994},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.342764675617218},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.081332266330719},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07282394170761108}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7972126007080078},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.747955322265625},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.7433584928512573},{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.7054393291473389},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.5968650579452515},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.5659878253936768},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5108591914176941},{"id":"https://openalex.org/C54953205","wikidata":"https://www.wikidata.org/wiki/Q4142201","display_name":"Speech analytics","level":4,"score":0.48568400740623474},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48228719830513},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4723286032676697},{"id":"https://openalex.org/C95851461","wikidata":"https://www.wikidata.org/wiki/Q468809","display_name":"Reverberation","level":2,"score":0.46698468923568726},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.4362228214740753},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.42312490940093994},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.342764675617218},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.081332266330719},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07282394170761108},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/d18-2016","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d18-2016","pdf_url":"https://www.aclweb.org/anthology/D18-2016.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing: System Demonstrations","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/d18-2016","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d18-2016","pdf_url":"https://www.aclweb.org/anthology/D18-2016.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing: System Demonstrations","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.5299999713897705,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G5399556803","display_name":null,"funder_award_id":"This project has received funding from the Europea","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8318064016","display_name":null,"funder_award_id":"Horizon","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2891499495.pdf","grobid_xml":"https://content.openalex.org/works/W2891499495.grobid-xml"},"referenced_works_count":19,"referenced_works":["https://openalex.org/W97072897","https://openalex.org/W108866686","https://openalex.org/W1494198834","https://openalex.org/W1524333225","https://openalex.org/W1710082047","https://openalex.org/W1922655562","https://openalex.org/W1924770834","https://openalex.org/W2033256038","https://openalex.org/W2076048613","https://openalex.org/W2127141656","https://openalex.org/W2160815625","https://openalex.org/W2164579587","https://openalex.org/W2193413348","https://openalex.org/W2251321385","https://openalex.org/W2327501763","https://openalex.org/W2465534249","https://openalex.org/W2619947201","https://openalex.org/W2963199341","https://openalex.org/W2964292728"],"related_works":["https://openalex.org/W3119324922","https://openalex.org/W2352686120","https://openalex.org/W2372594123","https://openalex.org/W2358310581","https://openalex.org/W2964752624","https://openalex.org/W2026132847","https://openalex.org/W4385695127","https://openalex.org/W2137810919","https://openalex.org/W2089702591","https://openalex.org/W4255854114"],"abstract_inverted_index":{"In":[0,40],"this":[1],"paper,":[2],"we":[3,43],"describe":[4],"KT-Speech-Crawler:":[5],"an":[6,64],"approach":[7],"for":[8,12,33],"automatic":[9],"dataset":[10],"construction":[11],"speech":[13,37,59],"recognition":[14,38],"by":[15],"crawling":[16],"YouTube":[17],"videos.":[18],"We":[19],"outline":[20],"several":[21],"filtering":[22],"and":[23],"postprocessing":[24],"steps,":[25],"which":[26],"extract":[27],"samples":[28],"that":[29,45],"can":[30,52],"be":[31],"used":[32],"training":[34],"end-to-end":[35],"neural":[36],"systems.":[39],"our":[41],"experiments,":[42],"demonstrate":[44],"a":[46,61],"single-core":[47],"version":[48],"of":[49,57],"the":[50,71],"crawler":[51],"obtain":[53],"around":[54],"150":[55],"hours":[56],"transcribed":[58],"within":[60],"day,":[62],"containing":[63],"estimated":[65],"3.5%":[66],"word":[67],"error":[68],"rate":[69],"in":[70],"transcriptions.":[72]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":2}],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
