{"id":"https://openalex.org/W2151166364","doi":"https://doi.org/10.5555/2627435.2638582","title":"Training highly multiclass classifiers","display_name":"Training highly multiclass classifiers","publication_year":2014,"publication_date":"2014-01-01","ids":{"openalex":"https://openalex.org/W2151166364","doi":"https://doi.org/10.5555/2627435.2638582","mag":"2151166364"},"language":"en","primary_location":{"id":"pmh:oai:CiteSeerX.psu:10.1.1.648.6831","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.648.6831","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://bengio.abracadoudou.com/cv/publications/pdf/gupta_2014_jmlr.pdf","raw_type":"text"},"type":"article","indexed_in":[],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111921762","display_name":"Maya R. Gupta","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Maya R. Gupta","raw_affiliation_strings":["Google, Inc, Mountain View, CA#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google, Inc, Mountain View, CA#TAB#","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017529415","display_name":"Samy Bengio","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samy Bengio","raw_affiliation_strings":["Google, Inc, Mountain View, CA#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google, Inc, Mountain View, CA#TAB#","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076635608","display_name":"Jason Weston","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason Weston","raw_affiliation_strings":["Google Inc, New York, NY"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Inc, New York, NY","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5111921762"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":12.2625,"has_fulltext":false,"cited_by_count":72,"citation_normalized_percentile":{"value":0.9865887,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":100},"biblio":{"volume":"15","issue":"1","first_page":"1461","last_page":"1492"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6655986309051514},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6365702152252197},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.6269515752792358},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6059072017669678},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5754002332687378},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.5605876445770264},{"id":"https://openalex.org/keywords/multiclass-classification","display_name":"Multiclass classification","score":0.5094075798988342},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5058352947235107},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.5034942030906677},{"id":"https://openalex.org/keywords/linear-classifier","display_name":"Linear classifier","score":0.5032142996788025},{"id":"https://openalex.org/keywords/dimensionality-reduction","display_name":"Dimensionality reduction","score":0.46752265095710754},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.44256356358528137},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.43610483407974243},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.10758334398269653}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6655986309051514},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6365702152252197},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.6269515752792358},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6059072017669678},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5754002332687378},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.5605876445770264},{"id":"https://openalex.org/C123860398","wikidata":"https://www.wikidata.org/wiki/Q6934605","display_name":"Multiclass classification","level":3,"score":0.5094075798988342},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5058352947235107},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.5034942030906677},{"id":"https://openalex.org/C139532973","wikidata":"https://www.wikidata.org/wiki/Q2679259","display_name":"Linear classifier","level":3,"score":0.5032142996788025},{"id":"https://openalex.org/C70518039","wikidata":"https://www.wikidata.org/wiki/Q16000077","display_name":"Dimensionality reduction","level":2,"score":0.46752265095710754},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.44256356358528137},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.43610483407974243},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.10758334398269653},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:CiteSeerX.psu:10.1.1.648.6831","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.648.6831","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://bengio.abracadoudou.com/cv/publications/pdf/gupta_2014_jmlr.pdf","raw_type":"text"},{"id":"mag:2151166364","is_oa":false,"landing_page_url":"http://jmlr.org/papers/volume15/gupta14a/gupta14a.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S118988714","display_name":"Journal of Machine Learning Research","issn_l":"1532-4435","issn":["1532-4435","1533-7928"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"Journal of Machine Learning Research","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W1944672","https://openalex.org/W2169610","https://openalex.org/W21006490","https://openalex.org/W1480376833","https://openalex.org/W1480538416","https://openalex.org/W1494922272","https://openalex.org/W1515999713","https://openalex.org/W1602492977","https://openalex.org/W1676820704","https://openalex.org/W1959000896","https://openalex.org/W1987063155","https://openalex.org/W2000655290","https://openalex.org/W2011058684","https://openalex.org/W2029727948","https://openalex.org/W2053229256","https://openalex.org/W2072128103","https://openalex.org/W2096199223","https://openalex.org/W2096520669","https://openalex.org/W2100743709","https://openalex.org/W2101276256","https://openalex.org/W2106578604","https://openalex.org/W2108598243","https://openalex.org/W2115358726","https://openalex.org/W2116339064","https://openalex.org/W2116444583","https://openalex.org/W2127279985","https://openalex.org/W2128017662","https://openalex.org/W2128154306","https://openalex.org/W2133864802","https://openalex.org/W2136158847","https://openalex.org/W2146396044","https://openalex.org/W2146502635","https://openalex.org/W2148603752","https://openalex.org/W2150385485","https://openalex.org/W2155144632","https://openalex.org/W2157065343","https://openalex.org/W2157791002","https://openalex.org/W2160218441","https://openalex.org/W2162867699","https://openalex.org/W2163605009","https://openalex.org/W2168231600","https://openalex.org/W2282078507","https://openalex.org/W2296073425","https://openalex.org/W2435338979","https://openalex.org/W2899606262","https://openalex.org/W2950171555","https://openalex.org/W2964231757","https://openalex.org/W2988119488","https://openalex.org/W3002694247"],"related_works":["https://openalex.org/W2157791002","https://openalex.org/W2108598243","https://openalex.org/W2153635508","https://openalex.org/W2133864802","https://openalex.org/W2116444583","https://openalex.org/W2163605009","https://openalex.org/W2117539524","https://openalex.org/W2101276256","https://openalex.org/W2194775991","https://openalex.org/W2164278908","https://openalex.org/W2112796928","https://openalex.org/W21006490","https://openalex.org/W1987063155","https://openalex.org/W1509803206","https://openalex.org/W1480538416","https://openalex.org/W2753797983","https://openalex.org/W2120725344","https://openalex.org/W174416256","https://openalex.org/W3097199945","https://openalex.org/W2267727210"],"abstract_inverted_index":{"Classification":[0],"problems":[1,121],"with":[2,122],"thousands":[3],"or":[4],"more":[5,22],"classes":[6,20,45,58,126],"often":[7],"have":[8],"a":[9],"large":[10],"range":[11],"of":[12,42,57,100],"class-confusabilities,":[13],"and":[14,51,104,117,138],"we":[15],"show":[16,73,127],"that":[17,28,38,59,74],"the":[18,25,40,48,53,69,75,96],"more-confusable":[19],"add":[21],"noise":[23],"to":[24,62,95,124,134],"em-pirical":[26],"loss":[27],"is":[29],"minimized":[30],"during":[31],"training.":[32,70],"We":[33,71],"propose":[34],"an":[35],"online":[36],"solution":[37],"reduces":[39],"effect":[41],"highly":[43],"confusable":[44],"in":[46,68,109,130],"training":[47,54,99],"classifier":[49],"parameters,":[50],"focuses":[52],"on":[55,112],"pairs":[56],"are":[60],"easier":[61],"differentiate":[63],"at":[64],"any":[65],"given":[66],"time":[67],"also":[72,91],"adagrad":[76],"method,":[77],"recently":[78],"proposed":[79],"for":[80,85],"automatically":[81],"decreasing":[82],"step":[83],"sizes":[84],"convex":[86],"stochastic":[87],"gradient":[88],"descent,":[89],"can":[90],"be":[92],"profitably":[93],"applied":[94],"non-convex":[97],"joint":[98],"supervised":[101],"dimensionality":[102],"reduction":[103],"linear":[105,136],"classifiers":[106],"as":[107],"done":[108],"Wsabie.":[110,139],"Experiments":[111],"ImageNet":[113],"benchmark":[114],"data":[115],"sets":[116],"proprietary":[118],"image":[119],"recogni-tion":[120],"15,000":[123],"97,000":[125],"substantial":[128],"gains":[129],"classification":[131],"accuracy":[132],"compared":[133],"one-vs-all":[135],"SVMs":[137]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":10},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":14},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":7},{"year":2014,"cited_by_count":3}],"updated_date":"2026-04-28T14:05:53.105641","created_date":"2025-10-10T00:00:00"}
