{"id":"https://openalex.org/W2747886799","doi":"https://doi.org/10.21437/interspeech.2017-818","title":"A Batch Noise Contrastive Estimation Approach for Training Large Vocabulary Language Models","display_name":"A Batch Noise Contrastive Estimation Approach for Training Large Vocabulary Language Models","publication_year":2017,"publication_date":"2017-08-16","ids":{"openalex":"https://openalex.org/W2747886799","doi":"https://doi.org/10.21437/interspeech.2017-818","mag":"2747886799"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2017-818","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2017-818","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2017","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1708.05997","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014456106","display_name":"Youssef Oualil","orcid":"https://orcid.org/0009-0002-8528-3339"},"institutions":[{"id":"https://openalex.org/I91712215","display_name":"Saarland University","ror":"https://ror.org/01jdpyv68","country_code":"DE","type":"education","lineage":["https://openalex.org/I91712215"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Youssef Oualil","raw_affiliation_strings":["Saarland University, Saarbr\u00fccken, Germany"],"affiliations":[{"raw_affiliation_string":"Saarland University, Saarbr\u00fccken, Germany","institution_ids":["https://openalex.org/I91712215"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008875255","display_name":"Dietrich Klakow","orcid":"https://orcid.org/0000-0002-4147-9690"},"institutions":[{"id":"https://openalex.org/I91712215","display_name":"Saarland University","ror":"https://ror.org/01jdpyv68","country_code":"DE","type":"education","lineage":["https://openalex.org/I91712215"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Dietrich Klakow","raw_affiliation_strings":["Saarland University, Saarbr\u00fccken, Germany"],"affiliations":[{"raw_affiliation_string":"Saarland University, Saarbr\u00fccken, Germany","institution_ids":["https://openalex.org/I91712215"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5014456106"],"corresponding_institution_ids":["https://openalex.org/I91712215"],"apc_list":null,"apc_paid":null,"fwci":1.4539,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.86869616,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"264","last_page":"268"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.936267614364624},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7862086296081543},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.6981281042098999},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5725314021110535},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.5495331883430481},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.4899086356163025},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.47190171480178833},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.46310967206954956},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4612720012664795},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44809088110923767},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32756179571151733},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.08608141541481018}],"concepts":[{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.936267614364624},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7862086296081543},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.6981281042098999},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5725314021110535},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.5495331883430481},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.4899086356163025},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.47190171480178833},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.46310967206954956},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4612720012664795},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44809088110923767},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32756179571151733},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.08608141541481018},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.21437/interspeech.2017-818","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2017-818","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2017","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1708.05997","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1708.05997","pdf_url":"https://arxiv.org/pdf/1708.05997","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2747886799","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1708.05997.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1708.05997","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1708.05997","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1708.05997","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1708.05997","pdf_url":"https://arxiv.org/pdf/1708.05997","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.8100000023841858,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2747886799.pdf","grobid_xml":"https://content.openalex.org/works/W2747886799.grobid-xml"},"referenced_works_count":15,"referenced_works":["https://openalex.org/W36903255","https://openalex.org/W179875071","https://openalex.org/W1520465330","https://openalex.org/W1558797106","https://openalex.org/W1686810756","https://openalex.org/W1934041838","https://openalex.org/W2133564696","https://openalex.org/W2152808281","https://openalex.org/W2171928131","https://openalex.org/W2259472270","https://openalex.org/W2402268235","https://openalex.org/W2437096199","https://openalex.org/W2950797609","https://openalex.org/W2951714314","https://openalex.org/W2963160216"],"related_works":["https://openalex.org/W1520465330","https://openalex.org/W3213809193","https://openalex.org/W2000626086","https://openalex.org/W2137309058","https://openalex.org/W2348615302","https://openalex.org/W2964030816","https://openalex.org/W3160648428","https://openalex.org/W2225246819","https://openalex.org/W2963747784","https://openalex.org/W3191747585","https://openalex.org/W2133564850","https://openalex.org/W2338891360","https://openalex.org/W2945797851","https://openalex.org/W2091544142","https://openalex.org/W2890220768","https://openalex.org/W1891637836","https://openalex.org/W2127824378","https://openalex.org/W2980849940","https://openalex.org/W2149215887","https://openalex.org/W2936078256"],"abstract_inverted_index":{"Training":[0],"large":[1,162],"vocabulary":[2],"Neural":[3],"Network":[4],"Language":[5],"Models":[6],"(NNLMs)":[7],"is":[8,51],"a":[9,39,132,151,165],"difficult":[10],"task":[11],"due":[12],"to":[13,46,61,113],"the":[14,18,25,28,33,55,62,66,71,74,83,91,97,118,125,136,144,161],"explicit":[15],"requirement":[16],"of":[17,27,85,135,143,156],"output":[19],"layer":[20],"normalization,":[21],"which":[22],"typically":[23],"involves":[24],"evaluation":[26],"full":[29],"softmax":[30,72],"function":[31],"over":[32],"complete":[34],"vocabulary.":[35],"This":[36,50,147],"paper":[37,148],"proposes":[38],"Batch":[40],"Noise":[41],"Contrastive":[42],"Estimation":[43],"(B-NCE)":[44],"approach":[45,99],"alleviate":[47],"this":[48],"problem.":[49],"achieved":[52],"by":[53,73],"reducing":[54],"vocabulary,":[56],"at":[57,90],"each":[58],"time":[59,138],"step,":[60],"target":[63],"words":[64,81],"in":[65],"batch":[67],"and":[68,87,104,124],"then":[69],"replacing":[70],"noise":[75,88],"contrastive":[76],"estimation":[77],"approach,":[78],"where":[79],"these":[80],"play":[82],"role":[84],"targets":[86],"samples":[89],"same":[92],"time.":[93],"In":[94],"doing":[95],"so,":[96],"proposed":[98],"can":[100],"be":[101],"fully":[102],"formulated":[103],"implemented":[105],"using":[106],"optimal":[107],"dense":[108],"matrix":[109],"operations.":[110],"Applying":[111],"B-NCE":[112],"train":[114],"different":[115,157],"NNLMs":[116,159],"on":[117,160,164],"Large":[119],"Text":[120],"Compression":[121],"Benchmark":[122,129],"(LTCB)":[123],"One":[126],"Billion":[127],"Word":[128],"(OBWB)":[130],"shows":[131],"significant":[133],"reduction":[134],"training":[137],"with":[139],"no":[140],"noticeable":[141],"degradation":[142],"models":[145],"performance.":[146],"also":[149],"presents":[150],"new":[152],"baseline":[153],"comparative":[154],"study":[155],"standard":[158],"OBWB":[163],"single":[166],"Titan-X":[167],"GPU.":[168]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
