{"id":"https://openalex.org/W4406785368","doi":"https://doi.org/10.1137/25m1726339","title":"Exploring Variance Reduction in Importance Sampling for Efficient DNN Training","display_name":"Exploring Variance Reduction in Importance Sampling for Efficient DNN Training","publication_year":2025,"publication_date":"2025-11-18","ids":{"openalex":"https://openalex.org/W4406785368","doi":"https://doi.org/10.1137/25m1726339"},"language":"en","primary_location":{"id":"doi:10.1137/25m1726339","is_oa":true,"landing_page_url":"https://doi.org/10.1137/25m1726339","pdf_url":null,"source":{"id":"https://openalex.org/S4210229561","display_name":"SIAM Journal on Mathematics of Data Science","issn_l":"2577-0187","issn":["2577-0187"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Mathematics of Data Science","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1137/25m1726339","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012936801","display_name":"Takuro Kutsuna","orcid":"https://orcid.org/0000-0001-6965-1512"},"institutions":[{"id":"https://openalex.org/I4210165351","display_name":"Toyota Central Research and Development Laboratories (Japan)","ror":"https://ror.org/05mjgqe69","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210125472","https://openalex.org/I4210165351"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Takuro Kutsuna","raw_affiliation_strings":["Toyota Central R&D Labs., Inc., Aichi, 480-1192 Japan"],"raw_orcid":"https://orcid.org/0000-0001-6965-1512","affiliations":[{"raw_affiliation_string":"Toyota Central R&D Labs., Inc., Aichi, 480-1192 Japan","institution_ids":["https://openalex.org/I4210165351"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5012936801"],"corresponding_institution_ids":["https://openalex.org/I4210165351"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00488502,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"7","issue":"4","first_page":"1828","last_page":"1856"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9524000287055969,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9524000287055969,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/variance-reduction","display_name":"Variance reduction","score":0.7945293188095093},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.7203006744384766},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.6184593439102173},{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.6146047711372375},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5940221548080444},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5081846714019775},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.43510156869888306},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39299479126930237},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3894859850406647},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.31426095962524414},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.1571412980556488},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.13351058959960938},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.10541808605194092},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.08333128690719604}],"concepts":[{"id":"https://openalex.org/C62644790","wikidata":"https://www.wikidata.org/wiki/Q3454689","display_name":"Variance reduction","level":3,"score":0.7945293188095093},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.7203006744384766},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.6184593439102173},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.6146047711372375},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5940221548080444},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5081846714019775},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.43510156869888306},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39299479126930237},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3894859850406647},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.31426095962524414},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.1571412980556488},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.13351058959960938},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.10541808605194092},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.08333128690719604},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1137/25m1726339","is_oa":true,"landing_page_url":"https://doi.org/10.1137/25m1726339","pdf_url":null,"source":{"id":"https://openalex.org/S4210229561","display_name":"SIAM Journal on Mathematics of Data Science","issn_l":"2577-0187","issn":["2577-0187"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Mathematics of Data Science","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2501.13296","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2501.13296","pdf_url":"https://arxiv.org/pdf/2501.13296","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2501.13296","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2501.13296","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1137/25m1726339","is_oa":true,"landing_page_url":"https://doi.org/10.1137/25m1726339","pdf_url":null,"source":{"id":"https://openalex.org/S4210229561","display_name":"SIAM Journal on Mathematics of Data Science","issn_l":"2577-0187","issn":["2577-0187"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320508","host_organization_name":"Society for Industrial and Applied Mathematics","host_organization_lineage":["https://openalex.org/P4310320508"],"host_organization_lineage_names":["Society for Industrial and Applied Mathematics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"SIAM Journal on Mathematics of Data Science","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2003732947","https://openalex.org/W2147800946","https://openalex.org/W2033057584","https://openalex.org/W2032309405","https://openalex.org/W2271361270","https://openalex.org/W1513873506","https://openalex.org/W3123742938"],"related_works":["https://openalex.org/W230091440","https://openalex.org/W2233261550","https://openalex.org/W2169816622","https://openalex.org/W2298254442","https://openalex.org/W1997242758","https://openalex.org/W2169367269","https://openalex.org/W2951368390","https://openalex.org/W2291558465","https://openalex.org/W1981992409","https://openalex.org/W2067529449"],"abstract_inverted_index":{"Importance":[0],"sampling":[1,31,85],"is":[2,86],"widely":[3],"used":[4],"to":[5,29,35,70,79],"improve":[6],"the":[7,17,25,59,62,81,114],"efficiency":[8,82],"of":[9,19,83,97],"deep":[10],"neural":[11],"network":[12],"(DNN)":[13],"training":[14,49,121],"by":[15],"reducing":[16],"variance":[18,26,45],"gradient":[20,103],"estimators.":[21],"However,":[22],"efficiently":[23],"assessing":[24],"reduction":[27,46],"relative":[28],"uniform":[30],"remains":[32],"challenging":[33],"due":[34],"computational":[36,135],"overhead.":[37,136],"This":[38],"paper":[39,63],"proposes":[40,65],"a":[41],"method":[42],"for":[43,94],"estimating":[44],"during":[47],"DNN":[48],"using":[50],"only":[51],"minibatches":[52],"sampled":[53],"under":[54],"importance":[55,84,98],"sampling.":[56],"By":[57],"leveraging":[58],"proposed":[60,115],"method,":[61],"also":[64,87],"an":[66,92],"effective":[67],"minibatch":[68],"size":[69],"enable":[71],"automatic":[72],"learning":[73],"rate":[74],"adjustment.":[75],"An":[76],"absolute":[77],"metric":[78],"quantify":[80],"introduced":[88],"as":[89,91],"well":[90],"algorithm":[93,116],"real-time":[95],"estimation":[96],"scores":[99],"based":[100],"on":[101,109],"moving":[102],"statistics.":[104],"Theoretical":[105],"analysis":[106],"and":[107,123],"experiments":[108],"benchmark":[110],"datasets":[111],"demonstrated":[112],"that":[113],"consistently":[117],"reduces":[118],"variance,":[119],"improves":[120],"efficiency,":[122],"enhances":[124],"model":[125],"accuracy":[126],"compared":[127],"with":[128],"current":[129],"importance-sampling":[130],"approaches":[131],"while":[132],"maintaining":[133],"minimal":[134]},"counts_by_year":[],"updated_date":"2025-11-23T23:15:26.331081","created_date":"2025-01-25T00:00:00"}
