{"id":"https://openalex.org/W4411099327","doi":"https://doi.org/10.1145/3723178.3723219","title":"Acoustic Analysis of Speech for Gender and Age Classification Using CNN and Machine Learning Techniques","display_name":"Acoustic Analysis of Speech for Gender and Age Classification Using CNN and Machine Learning Techniques","publication_year":2024,"publication_date":"2024-10-17","ids":{"openalex":"https://openalex.org/W4411099327","doi":"https://doi.org/10.1145/3723178.3723219"},"language":"en","primary_location":{"id":"doi:10.1145/3723178.3723219","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3723178.3723219","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3723178.3723219","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd International Conference on Computing Advancements","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3723178.3723219","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102243555","display_name":"Ahmed Shakib Reza","orcid":"https://orcid.org/0009-0002-9964-6495"},"institutions":[{"id":"https://openalex.org/I5518804","display_name":"BRAC University","ror":"https://ror.org/00sge8677","country_code":"BD","type":"education","lineage":["https://openalex.org/I5518804"]}],"countries":["BD"],"is_corresponding":true,"raw_author_name":"Ahmed Shakib Reza","raw_affiliation_strings":["Department of Computer Science and Engineering, BRAC University (BRACU), Dhaka, Bangladesh"],"raw_orcid":"https://orcid.org/0009-0002-9964-6495","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, BRAC University (BRACU), Dhaka, Bangladesh","institution_ids":["https://openalex.org/I5518804"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021919229","display_name":"Md. Kishor Morol","orcid":"https://orcid.org/0000-0002-4468-8260"},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Md Kishor Morol","raw_affiliation_strings":["Department of Computing and Information Science, Cornell University, New York, USA"],"raw_orcid":"https://orcid.org/0000-0002-4468-8260","affiliations":[{"raw_affiliation_string":"Department of Computing and Information Science, Cornell University, New York, USA","institution_ids":["https://openalex.org/I205783295"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091998586","display_name":"Shakib Sadat Shanto","orcid":"https://orcid.org/0009-0009-8798-9010"},"institutions":[{"id":"https://openalex.org/I103434671","display_name":"American International University-Bangladesh","ror":"https://ror.org/02j8ga255","country_code":"BD","type":"education","lineage":["https://openalex.org/I103434671"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Shakib Sadat Shanto","raw_affiliation_strings":["Department of Computer Science, American International University-Bangladesh (AIUB), Dhaka, Bangladesh"],"raw_orcid":"https://orcid.org/0009-0009-8798-9010","affiliations":[{"raw_affiliation_string":"Department of Computer Science, American International University-Bangladesh (AIUB), Dhaka, Bangladesh","institution_ids":["https://openalex.org/I103434671"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061836585","display_name":"Zishan Ahmed","orcid":"https://orcid.org/0009-0004-9598-917X"},"institutions":[{"id":"https://openalex.org/I103434671","display_name":"American International University-Bangladesh","ror":"https://ror.org/02j8ga255","country_code":"BD","type":"education","lineage":["https://openalex.org/I103434671"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Zishan Ahmed","raw_affiliation_strings":["Department of Computer Science, American International University-Bangladesh (AIUB), Dhaka, Bangladesh"],"raw_orcid":"https://orcid.org/0009-0004-9598-917X","affiliations":[{"raw_affiliation_string":"Department of Computer Science, American International University-Bangladesh (AIUB), Dhaka, Bangladesh","institution_ids":["https://openalex.org/I103434671"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013085859","display_name":"Md. Abdullah-Al-Jubair","orcid":"https://orcid.org/0000-0001-6649-4435"},"institutions":[{"id":"https://openalex.org/I103434671","display_name":"American International University-Bangladesh","ror":"https://ror.org/02j8ga255","country_code":"BD","type":"education","lineage":["https://openalex.org/I103434671"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Md Abdullah Al Jubair","raw_affiliation_strings":["Department of Computer Science, American International University - Bangladesh (AIUB), Dhaka, Bangladesh"],"raw_orcid":"https://orcid.org/0000-0001-6649-4435","affiliations":[{"raw_affiliation_string":"Department of Computer Science, American International University - Bangladesh (AIUB), Dhaka, Bangladesh","institution_ids":["https://openalex.org/I103434671"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5102243555"],"corresponding_institution_ids":["https://openalex.org/I5518804"],"apc_list":null,"apc_paid":null,"fwci":0.6623,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.77600575,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"311","last_page":"318"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7045878767967224},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6250914931297302},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4886946976184845},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3420940041542053},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.33649009466171265}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7045878767967224},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6250914931297302},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4886946976184845},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3420940041542053},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33649009466171265}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3723178.3723219","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3723178.3723219","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3723178.3723219","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd International Conference on Computing Advancements","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3723178.3723219","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3723178.3723219","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3723178.3723219","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd International Conference on Computing Advancements","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Gender equality","id":"https://metadata.un.org/sdg/5","score":0.4699999988079071},{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.46000000834465027}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4411099327.pdf","grobid_xml":"https://content.openalex.org/works/W4411099327.grobid-xml"},"referenced_works_count":18,"referenced_works":["https://openalex.org/W247701703","https://openalex.org/W2477254990","https://openalex.org/W2534261280","https://openalex.org/W2594820332","https://openalex.org/W2800861965","https://openalex.org/W2901593510","https://openalex.org/W2903072245","https://openalex.org/W2982641643","https://openalex.org/W3002921525","https://openalex.org/W3011146400","https://openalex.org/W3153231713","https://openalex.org/W3185317940","https://openalex.org/W4286571608","https://openalex.org/W4308654587","https://openalex.org/W4313263179","https://openalex.org/W4379875296","https://openalex.org/W4388441063","https://openalex.org/W4389138951"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4387369504","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Age":[0],"and":[1,15,26,42,47,59,122,141,147,176,192],"gender":[2,179],"classification":[3,175,206],"from":[4,76,104,188],"voice":[5,78,157],"recordings":[6],"has":[7],"significant":[8],"applications":[9],"in":[10,19,39,144],"personalized":[11],"marketing,":[12],"human-computer":[13],"interaction,":[14],"speech":[16,106,145],"processing.Recent":[17],"advancements":[18],"this":[20],"field":[21],"primarily":[22],"employ":[23],"neural":[24,69],"networks":[25],"traditional":[27,82,164],"machine":[28,165],"learning":[29,102,151,166],"techniques":[30],"to":[31,99,183],"enhance":[32,100],"prediction":[33],"accuracy.However,":[34],"several":[35],"gaps":[36],"remain,":[37],"particularly":[38],"the":[40,81,91,105,108,123,126],"robustness":[41],"generalizability":[43,194],"across":[44,119,195],"diverse":[45],"languages":[46,197],"datasets,":[48],"as":[49],"many":[50],"existing":[51],"models":[52],"rely":[53],"heavily":[54],"on":[55,84],"manual":[56,85],"feature":[57,86,101],"engineering":[58],"specific":[60],"data":[61,146,191],"preprocessing":[62],"steps.This":[63],"study":[64],"proposes":[65],"a":[66,111,130,200],"novel":[67],"convolutional":[68],"network":[70],"(CNN)":[71],"architecture":[72,137],"that":[73,153],"directly":[74,103,154,187],"learns":[75],"raw":[77,156,189],"data,":[79],"bypassing":[80],"reliance":[83],"engineering.The":[87],"main":[88],"contributions":[89],"include":[90],"introduction":[92],"of":[93,110,116,125,133,171],"an":[94,149,169],"advanced":[95],"CNN":[96,136,160],"model":[97,128,161],"designed":[98],"waveform,":[107],"implementation":[109],"more":[112],"generalized":[113],"system":[114],"capable":[115],"operating":[117],"effectively":[118],"different":[120,196],"languages,":[121],"validation":[124],"proposed":[127,135,159],"against":[129],"comprehensive":[131],"set":[132],"metrics.The":[134],"captures":[138],"both":[139],"local":[140],"global":[142],"dependencies":[143],"employs":[148],"end-to-end":[150],"approach":[152],"processes":[155],"signals.The":[158],"consistently":[162],"outperformed":[163],"approaches,":[167],"achieving":[168],"accuracy":[170],"88.1%":[172],"for":[173,178,203],"age":[174],"96.9%":[177],"classification.The":[180],"model's":[181],"ability":[182],"learn":[184],"discriminative":[185],"features":[186],"audio":[190],"its":[193],"make":[198],"it":[199],"promising":[201],"solution":[202],"voice-based":[204],"demographic":[205],"tasks.":[207]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
