{"id":"https://openalex.org/W3182478434","doi":"https://doi.org/10.1109/taslp.2021.3095656","title":"Gamma Boltzmann Machine for Audio Modeling","display_name":"Gamma Boltzmann Machine for Audio Modeling","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3182478434","doi":"https://doi.org/10.1109/taslp.2021.3095656","mag":"3182478434"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2021.3095656","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3095656","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/9289074/09478208.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://ieeexplore.ieee.org/ielx7/6570655/9289074/09478208.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041887072","display_name":"Toru Nakashika","orcid":"https://orcid.org/0000-0003-1863-6771"},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Toru Nakashika","raw_affiliation_strings":["Graduate School of Informatics and Engineering, The University of Electro-Communications, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics and Engineering, The University of Electro-Communications, Tokyo, Japan","institution_ids":["https://openalex.org/I20529979"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034837951","display_name":"Kohei Yatabe","orcid":"https://orcid.org/0000-0002-1345-0663"},"institutions":[{"id":"https://openalex.org/I150744194","display_name":"Waseda University","ror":"https://ror.org/00ntfnx83","country_code":"JP","type":"education","lineage":["https://openalex.org/I150744194"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kohei Yatabe","raw_affiliation_strings":["Department of Intermedia Art and Science, Waseda University, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Department of Intermedia Art and Science, Waseda University, Tokyo, Japan","institution_ids":["https://openalex.org/I150744194"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5041887072"],"corresponding_institution_ids":["https://openalex.org/I20529979"],"apc_list":null,"apc_paid":null,"fwci":0.7715,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.70848659,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"29","issue":null,"first_page":"2591","last_page":"2605"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/restricted-boltzmann-machine","display_name":"Restricted Boltzmann machine","score":0.7679513692855835},{"id":"https://openalex.org/keywords/logarithm","display_name":"Logarithm","score":0.7346790432929993},{"id":"https://openalex.org/keywords/boltzmann-machine","display_name":"Boltzmann machine","score":0.6953295469284058},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.5803204774856567},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5772470831871033},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.45095646381378174},{"id":"https://openalex.org/keywords/gamma-distribution","display_name":"Gamma distribution","score":0.4344610571861267},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.41487547755241394},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.41460996866226196},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4086986780166626},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3368455767631531},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.27786415815353394},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.10854166746139526}],"concepts":[{"id":"https://openalex.org/C199354608","wikidata":"https://www.wikidata.org/wiki/Q7316287","display_name":"Restricted Boltzmann machine","level":3,"score":0.7679513692855835},{"id":"https://openalex.org/C39927690","wikidata":"https://www.wikidata.org/wiki/Q11197","display_name":"Logarithm","level":2,"score":0.7346790432929993},{"id":"https://openalex.org/C192576344","wikidata":"https://www.wikidata.org/wiki/Q194706","display_name":"Boltzmann machine","level":3,"score":0.6953295469284058},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.5803204774856567},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5772470831871033},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.45095646381378174},{"id":"https://openalex.org/C149717495","wikidata":"https://www.wikidata.org/wiki/Q117806","display_name":"Gamma distribution","level":2,"score":0.4344610571861267},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.41487547755241394},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.41460996866226196},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4086986780166626},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3368455767631531},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.27786415815353394},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.10854166746139526},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2021.3095656","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3095656","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/9289074/09478208.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/taslp.2021.3095656","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3095656","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/9289074/09478208.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.7900000214576721,"id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G1069223013","display_name":null,"funder_award_id":"JSPS KAKENHI","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G3459562248","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G3942829656","display_name":"Modelling Speech Spectra Based on Logarithmic Shallow Neural Networks","funder_award_id":"21K11957","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G4636223006","display_name":null,"funder_award_id":"JSPS KAK","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3182478434.pdf","grobid_xml":"https://content.openalex.org/works/W3182478434.grobid-xml"},"referenced_works_count":65,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1836465849","https://openalex.org/W1959608418","https://openalex.org/W1985090574","https://openalex.org/W2042492924","https://openalex.org/W2048708177","https://openalex.org/W2064082346","https://openalex.org/W2099471712","https://openalex.org/W2099866409","https://openalex.org/W2100495367","https://openalex.org/W2103636088","https://openalex.org/W2130389898","https://openalex.org/W2131062138","https://openalex.org/W2132037657","https://openalex.org/W2133257461","https://openalex.org/W2136922672","https://openalex.org/W2136936677","https://openalex.org/W2137782235","https://openalex.org/W2162520685","https://openalex.org/W2165225968","https://openalex.org/W2194775991","https://openalex.org/W2255418810","https://openalex.org/W2267126114","https://openalex.org/W2291877678","https://openalex.org/W2471520273","https://openalex.org/W2475998840","https://openalex.org/W2508627142","https://openalex.org/W2513927927","https://openalex.org/W2519091744","https://openalex.org/W2547523780","https://openalex.org/W2746654391","https://openalex.org/W2774389566","https://openalex.org/W2802304149","https://openalex.org/W2806925955","https://openalex.org/W2888892098","https://openalex.org/W2889205417","https://openalex.org/W2948211236","https://openalex.org/W2949382160","https://openalex.org/W2953318193","https://openalex.org/W2963071736","https://openalex.org/W2963090522","https://openalex.org/W2963300588","https://openalex.org/W2963609956","https://openalex.org/W2963828919","https://openalex.org/W2963971656","https://openalex.org/W2964052309","https://openalex.org/W2964121744","https://openalex.org/W2965390715","https://openalex.org/W2970006822","https://openalex.org/W3033703941","https://openalex.org/W3037326150","https://openalex.org/W3097450432","https://openalex.org/W4285719527","https://openalex.org/W4320013936","https://openalex.org/W6610566761","https://openalex.org/W6631190155","https://openalex.org/W6638667902","https://openalex.org/W6640963894","https://openalex.org/W6646555135","https://openalex.org/W6679718588","https://openalex.org/W6684349851","https://openalex.org/W6744040789","https://openalex.org/W6763100362","https://openalex.org/W6767111847","https://openalex.org/W6780421020"],"related_works":["https://openalex.org/W2892911634","https://openalex.org/W2461687944","https://openalex.org/W3044458868","https://openalex.org/W870929296","https://openalex.org/W2320963147","https://openalex.org/W3082895349","https://openalex.org/W2971852391","https://openalex.org/W3037326150","https://openalex.org/W3130931529","https://openalex.org/W3182478434"],"abstract_inverted_index":{"This":[0,143],"paper":[1],"presents":[2],"an":[3,136],"energy-based":[4],"probabilistic":[5],"model":[6,90],"that":[7,81,107,120,158],"handles":[8],"nonnegative":[9],"data":[10,123],"in":[11,44,54,96,147],"consideration":[12],"of":[13,23,32,56,78,197],"both":[14,129,141],"linear":[15,130],"and":[16,100,111,131,165,191],"logarithmic":[17,50,66,71,92,132],"scales.":[18,142],"In":[19],"audio":[20,46],"applications,":[21],"magnitude":[22],"time-frequency":[24],"representation,":[25],"including":[26],"spectrogram,":[27],"is":[28,52,73,115,159,170],"regarded":[29],"as":[30,161],"one":[31],"the":[33,59,76,91,152,162,167,181,195,198],"most":[34],"important":[35,53],"features.":[36],"Such":[37],"magnitude-based":[38],"features":[39,60,79],"have":[40,87],"been":[41],"extensively":[42],"utilized":[43],"learning-based":[45],"processing.":[47],"Since":[48],"a":[49,65,70,82,97,102,116,148],"scale":[51],"terms":[55],"auditory":[57],"perception,":[58],"are":[61],"usually":[62],"computed":[63],"with":[64],"function.":[67],"That":[68],"is,":[69],"function":[72,138,145],"applied":[74],"within":[75],"computation":[77],"so":[80],"learning":[83],"machine":[84,105],"does":[85],"not":[86],"to":[88,180],"explicitly":[89],"scale.":[93],"We":[94],"think":[95],"different":[98],"way":[99],"propose":[101],"restricted":[103],"Boltzmann":[104],"(RBM)":[106],"simultaneously":[108],"models":[109],"linear-":[110],"log-magnitude":[112],"spectra.":[113],"RBM":[114,169,177,184],"stochastic":[117],"neural":[118],"network":[119],"can":[121],"discover":[122],"representations":[124],"without":[125],"supervision.":[126],"To":[127],"manage":[128],"scales,":[133],"we":[134],"define":[135],"energy":[137,144],"based":[139],"on":[140],"results":[146],"conditional":[149],"distribution":[150],"(of":[151],"observable":[153],"data,":[154],"given":[155],"hidden":[156],"units)":[157],"written":[160],"gamma":[163],"distribution,":[164],"hence":[166],"proposed":[168,175,199],"termed":[171],"gamma-Bernoulli":[172,176],"RBM.":[173],"The":[174],"was":[178],"compared":[179],"ordinary":[182],"Gaussian-Bernoulli":[183],"by":[185],"speech":[186],"representation":[187],"experiments.":[188],"Both":[189],"objective":[190],"subjective":[192],"evaluations":[193],"illustrated":[194],"advantage":[196],"model.":[200]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
