{"id":"https://openalex.org/W2975491454","doi":"https://doi.org/10.1109/isit.2019.8849289","title":"Local Geometry of Cross Entropy Loss in Learning One-Hidden-Layer Neural Networks","display_name":"Local Geometry of Cross Entropy Loss in Learning One-Hidden-Layer Neural Networks","publication_year":2019,"publication_date":"2019-07-01","ids":{"openalex":"https://openalex.org/W2975491454","doi":"https://doi.org/10.1109/isit.2019.8849289","mag":"2975491454"},"language":"en","primary_location":{"id":"doi:10.1109/isit.2019.8849289","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isit.2019.8849289","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Symposium on Information Theory (ISIT)","raw_type":"proceedings-article"},"type":"conference-paper","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013839479","display_name":"Haoyu Fu","orcid":"https://orcid.org/0000-0002-2108-7729"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haoyu Fu","raw_affiliation_strings":["Dept. of ECE, The Ohio State University, Columbus, OH, 43210, USA","Department of ECE, The Ohio State University, Columbus, OH 43210, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept. of ECE, The Ohio State University, Columbus, OH, 43210, USA","institution_ids":["https://openalex.org/I52357470"]},{"raw_affiliation_string":"Department of ECE, The Ohio State University, Columbus, OH 43210, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053809095","display_name":"Yuejie Chi","orcid":"https://orcid.org/0000-0002-6766-5459"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuejie Chi","raw_affiliation_strings":["Dept. of ECE, Carnegie Mellon University, Pittsburgh, PA, 15213, USA","Dept. of ECE, Carnegie Mellon University, Pittsburgh, PA 15213, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept. of ECE, Carnegie Mellon University, Pittsburgh, PA, 15213, USA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Dept. of ECE, Carnegie Mellon University, Pittsburgh, PA 15213, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100384384","display_name":"Yingbin Liang","orcid":"https://orcid.org/0000-0003-2631-4262"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yingbin Liang","raw_affiliation_strings":["Dept. of ECE, The Ohio State University, Columbus, OH, 43210, USA","Department of ECE, The Ohio State University, Columbus, OH 43210, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept. of ECE, The Ohio State University, Columbus, OH, 43210, USA","institution_ids":["https://openalex.org/I52357470"]},{"raw_affiliation_string":"Department of ECE, The Ohio State University, Columbus, OH 43210, USA","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1972","last_page":"1976"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cross-entropy","display_name":"Cross entropy","score":0.7740190625190735},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6613715887069702},{"id":"https://openalex.org/keywords/empirical-risk-minimization","display_name":"Empirical risk minimization","score":0.6570900678634644},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.6141352653503418},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6060274243354797},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5464486479759216},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.5325511693954468},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.501478910446167},{"id":"https://openalex.org/keywords/convexity","display_name":"Convexity","score":0.4913603365421295},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.43427830934524536},{"id":"https://openalex.org/keywords/smoothness","display_name":"Smoothness","score":0.4330371618270874},{"id":"https://openalex.org/keywords/feedforward-neural-network","display_name":"Feedforward neural network","score":0.42466944456100464},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41642695665359497},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.35314083099365234},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.31113022565841675}],"concepts":[{"id":"https://openalex.org/C167981619","wikidata":"https://www.wikidata.org/wiki/Q1685498","display_name":"Cross entropy","level":3,"score":0.7740190625190735},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6613715887069702},{"id":"https://openalex.org/C107321475","wikidata":"https://www.wikidata.org/wiki/Q5374254","display_name":"Empirical risk minimization","level":2,"score":0.6570900678634644},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.6141352653503418},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6060274243354797},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5464486479759216},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5325511693954468},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.501478910446167},{"id":"https://openalex.org/C72134830","wikidata":"https://www.wikidata.org/wiki/Q5166524","display_name":"Convexity","level":2,"score":0.4913603365421295},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.43427830934524536},{"id":"https://openalex.org/C102634674","wikidata":"https://www.wikidata.org/wiki/Q868473","display_name":"Smoothness","level":2,"score":0.4330371618270874},{"id":"https://openalex.org/C47702885","wikidata":"https://www.wikidata.org/wiki/Q5441227","display_name":"Feedforward neural network","level":3,"score":0.42466944456100464},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41642695665359497},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.35314083099365234},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.31113022565841675},{"id":"https://openalex.org/C106159729","wikidata":"https://www.wikidata.org/wiki/Q2294553","display_name":"Financial economics","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/isit.2019.8849289","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isit.2019.8849289","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Symposium on Information Theory (ISIT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W2591714514","https://openalex.org/W2608609325","https://openalex.org/W2618398196","https://openalex.org/W2625063094","https://openalex.org/W2758053331","https://openalex.org/W2765701314","https://openalex.org/W2777256551","https://openalex.org/W2785631679","https://openalex.org/W2788305524","https://openalex.org/W2789138075","https://openalex.org/W2808958252","https://openalex.org/W2890963110","https://openalex.org/W2899748887","https://openalex.org/W2899790086","https://openalex.org/W2900959181","https://openalex.org/W2952318479","https://openalex.org/W2962698540","https://openalex.org/W2962704746","https://openalex.org/W2962767131","https://openalex.org/W2963122491","https://openalex.org/W2963211922","https://openalex.org/W2963326517","https://openalex.org/W2963417959","https://openalex.org/W2963427613","https://openalex.org/W2963519230","https://openalex.org/W2963569411","https://openalex.org/W2963672337","https://openalex.org/W2963827833","https://openalex.org/W2964256664","https://openalex.org/W4293717613","https://openalex.org/W4295257237","https://openalex.org/W4298857857","https://openalex.org/W4302027967","https://openalex.org/W6690388216","https://openalex.org/W6733941281","https://openalex.org/W6736786397","https://openalex.org/W6737871451","https://openalex.org/W6738373677","https://openalex.org/W6739166439","https://openalex.org/W6741721318","https://openalex.org/W6744879188","https://openalex.org/W6745403758","https://openalex.org/W6745448519","https://openalex.org/W6746713790","https://openalex.org/W6748256991","https://openalex.org/W6748266487","https://openalex.org/W6748269070","https://openalex.org/W6748519251","https://openalex.org/W6748770720","https://openalex.org/W6752544856","https://openalex.org/W6754361328","https://openalex.org/W6756001544","https://openalex.org/W6756137178"],"related_works":["https://openalex.org/W1964872188","https://openalex.org/W2187391117","https://openalex.org/W1981809986","https://openalex.org/W2952817981","https://openalex.org/W2808958252","https://openalex.org/W2365402722","https://openalex.org/W2975491454","https://openalex.org/W2786807178","https://openalex.org/W2963564348","https://openalex.org/W2914605961"],"abstract_inverted_index":{"We":[0,33,49],"study":[1],"model":[2],"recovery":[3],"for":[4,97,107],"data":[5],"classification,":[6],"where":[7],"the":[8,22,27,30,38,43,55,73,79,93,113,122],"training":[9],"labels":[10],"are":[11],"generated":[12],"from":[13],"a":[14,69,132],"one-hidden-layer":[15,109],"neural":[16,31,46,110],"network":[17,36,40,47,123],"with":[18,52,119],"sigmoid":[19],"activations,":[20],"and":[21,42,65,116],"goal":[23],"is":[24,82],"to":[25,121],"recover":[26],"weights":[28],"of":[29,72,135],"network.":[32],"consider":[34],"two":[35],"models,":[37],"fully-connected":[39],"(FCN)":[41],"non-overlapping":[44],"convolutional":[45],"(CNN).":[48],"prove":[50],"that":[51],"Gaussian":[53],"inputs,":[54],"empirical":[56,98],"risk":[57,99],"based":[58],"on":[59],"cross":[60,102],"entropy":[61,103],"exhibits":[62],"strong":[63],"convexity":[64],"smoothness":[66],"uniformly":[67],"in":[68,88],"local":[70,94],"neighborhood":[71],"ground":[74],"truth,":[75],"as":[76,78,130],"soon":[77],"sample":[80,115],"complexity":[81,118],"sufficiently":[83],"large.":[84],"Hence,":[85],"if":[86],"initialized":[87],"this":[89],"neighborhood,":[90],"it":[91],"establishes":[92],"convergence":[95],"guarantee":[96],"minimization":[100],"using":[101],"via":[104],"gradient":[105],"descent":[106],"learning":[108],"networks,":[111],"at":[112,137],"near-optimal":[114],"computational":[117],"respect":[120],"input":[124],"dimension":[125],"without":[126],"unrealistic":[127],"assumptions":[128],"such":[129],"requiring":[131],"fresh":[133],"set":[134],"samples":[136],"each":[138],"iteration.":[139]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-07-14T23:27:15.235271","created_date":"2025-10-10T00:00:00"}