{"id":"https://openalex.org/W4413376533","doi":"https://doi.org/10.3390/jimaging11080281","title":"Deep Spectrogram Learning for Gunshot Classification: A Comparative Study of CNN Architectures and Time-Frequency Representations","display_name":"Deep Spectrogram Learning for Gunshot Classification: A Comparative Study of CNN Architectures and Time-Frequency Representations","publication_year":2025,"publication_date":"2025-08-21","ids":{"openalex":"https://openalex.org/W4413376533","doi":"https://doi.org/10.3390/jimaging11080281","pmid":"https://pubmed.ncbi.nlm.nih.gov/40863491"},"language":"en","primary_location":{"id":"doi:10.3390/jimaging11080281","is_oa":true,"landing_page_url":"https://doi.org/10.3390/jimaging11080281","pdf_url":"https://www.mdpi.com/2313-433X/11/8/281/pdf?version=1755769522","source":{"id":"https://openalex.org/S2736465063","display_name":"Journal of Imaging","issn_l":"2313-433X","issn":["2313-433X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Imaging","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2313-433X/11/8/281/pdf?version=1755769522","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085053950","display_name":"Pafan Doungpaisan","orcid":"https://orcid.org/0000-0002-8969-2984"},"institutions":[{"id":"https://openalex.org/I82828225","display_name":"King Mongkut's University of Technology North Bangkok","ror":"https://ror.org/04fy6jb97","country_code":"TH","type":"education","lineage":["https://openalex.org/I82828225"]}],"countries":["TH"],"is_corresponding":false,"raw_author_name":"Pafan Doungpaisan","raw_affiliation_strings":["Faculty of Industrial Technology and Management, King Mongkut\u2019s University of Technology North Bangkok, Bangkok 10800, Thailand","Faculty of Industrial Technology and Management, King Mongkut's University of Technology North Bangkok, Bangkok 10800, Thailand"],"raw_orcid":"https://orcid.org/0000-0002-8969-2984","affiliations":[{"raw_affiliation_string":"Faculty of Industrial Technology and Management, King Mongkut\u2019s University of Technology North Bangkok, Bangkok 10800, Thailand","institution_ids":["https://openalex.org/I82828225"]},{"raw_affiliation_string":"Faculty of Industrial Technology and Management, King Mongkut's University of Technology North Bangkok, Bangkok 10800, Thailand","institution_ids":["https://openalex.org/I82828225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063844057","display_name":"Peerapol Khunarsa","orcid":"https://orcid.org/0000-0002-4252-078X"},"institutions":[{"id":"https://openalex.org/I176205391","display_name":"Uttaradit Rajabhat University","ror":"https://ror.org/01rs03g07","country_code":"TH","type":"education","lineage":["https://openalex.org/I176205391"]}],"countries":["TH"],"is_corresponding":true,"raw_author_name":"Peerapol Khunarsa","raw_affiliation_strings":["Faculty of Science and Technology, Uttaradit Rajabhat University, Uttaradit 53000, Thailand"],"raw_orcid":"https://orcid.org/0000-0002-4252-078X","affiliations":[{"raw_affiliation_string":"Faculty of Science and Technology, Uttaradit Rajabhat University, Uttaradit 53000, Thailand","institution_ids":["https://openalex.org/I176205391"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5063844057"],"corresponding_institution_ids":["https://openalex.org/I176205391"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":7.7074,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.9706495,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":"11","issue":"8","first_page":"281","last_page":"281"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9873999953269958,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13192","display_name":"Forensic Fingerprint Detection Methods","score":0.9811999797821045,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.9259424209594727},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7927473783493042},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7882936596870422},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6468733549118042},{"id":"https://openalex.org/keywords/short-time-fourier-transform","display_name":"Short-time Fourier transform","score":0.6220147609710693},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5895229578018188},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.5134373903274536},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4837241470813751},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4375463128089905},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.41158705949783325},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4053448438644409},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3884938657283783},{"id":"https://openalex.org/keywords/fourier-transform","display_name":"Fourier transform","score":0.16801053285598755}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.9259424209594727},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7927473783493042},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7882936596870422},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6468733549118042},{"id":"https://openalex.org/C166386157","wikidata":"https://www.wikidata.org/wiki/Q1477735","display_name":"Short-time Fourier transform","level":4,"score":0.6220147609710693},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5895229578018188},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.5134373903274536},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4837241470813751},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4375463128089905},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.41158705949783325},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4053448438644409},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3884938657283783},{"id":"https://openalex.org/C102519508","wikidata":"https://www.wikidata.org/wiki/Q6520159","display_name":"Fourier transform","level":2,"score":0.16801053285598755},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C203024314","wikidata":"https://www.wikidata.org/wiki/Q1365258","display_name":"Fourier analysis","level":3,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3390/jimaging11080281","is_oa":true,"landing_page_url":"https://doi.org/10.3390/jimaging11080281","pdf_url":"https://www.mdpi.com/2313-433X/11/8/281/pdf?version=1755769522","source":{"id":"https://openalex.org/S2736465063","display_name":"Journal of Imaging","issn_l":"2313-433X","issn":["2313-433X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Imaging","raw_type":"journal-article"},{"id":"pmid:40863491","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40863491","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of imaging","raw_type":null},{"id":"pmh:oai:doaj.org/article:9c6b0c4a08214db2851be55dfcc5934f","is_oa":true,"landing_page_url":"https://doaj.org/article/9c6b0c4a08214db2851be55dfcc5934f","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Imaging, Vol 11, Iss 8, p 281 (2025)","raw_type":"article"},{"id":"pmh:oai:europepmc.org:11195653","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/12387842","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/jimaging11080281","is_oa":true,"landing_page_url":"https://doi.org/10.3390/jimaging11080281","pdf_url":"https://www.mdpi.com/2313-433X/11/8/281/pdf?version=1755769522","source":{"id":"https://openalex.org/S2736465063","display_name":"Journal of Imaging","issn_l":"2313-433X","issn":["2313-433X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Imaging","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4413376533.pdf","grobid_xml":"https://content.openalex.org/works/W4413376533.grobid-xml"},"referenced_works_count":57,"referenced_works":["https://openalex.org/W2097117768","https://openalex.org/W2565639579","https://openalex.org/W2733840449","https://openalex.org/W2784050770","https://openalex.org/W2797797964","https://openalex.org/W2801828415","https://openalex.org/W2884821113","https://openalex.org/W2949824473","https://openalex.org/W2964350391","https://openalex.org/W3017228718","https://openalex.org/W3020108204","https://openalex.org/W3023261343","https://openalex.org/W3033243763","https://openalex.org/W3043920615","https://openalex.org/W3091788134","https://openalex.org/W3092106208","https://openalex.org/W3102695566","https://openalex.org/W3112528637","https://openalex.org/W3133941734","https://openalex.org/W3135448569","https://openalex.org/W3158563192","https://openalex.org/W3159590571","https://openalex.org/W3176892709","https://openalex.org/W3189415251","https://openalex.org/W3195009539","https://openalex.org/W3201570765","https://openalex.org/W3203431461","https://openalex.org/W3205740033","https://openalex.org/W3212807787","https://openalex.org/W3216214094","https://openalex.org/W4223961890","https://openalex.org/W4282927465","https://openalex.org/W4286587846","https://openalex.org/W4288453303","https://openalex.org/W4296295784","https://openalex.org/W4303448478","https://openalex.org/W4308086473","https://openalex.org/W4309212884","https://openalex.org/W4320723550","https://openalex.org/W4360956127","https://openalex.org/W4378906283","https://openalex.org/W4386825037","https://openalex.org/W4386918931","https://openalex.org/W4388697156","https://openalex.org/W4391250017","https://openalex.org/W4392908103","https://openalex.org/W4395027907","https://openalex.org/W4400065444","https://openalex.org/W4401597485","https://openalex.org/W4402187355","https://openalex.org/W4402187402","https://openalex.org/W4402594237","https://openalex.org/W4403182579","https://openalex.org/W4403211873","https://openalex.org/W4404035327","https://openalex.org/W6841089370","https://openalex.org/W6845686123"],"related_works":["https://openalex.org/W2120540196","https://openalex.org/W3095343173","https://openalex.org/W2381036744","https://openalex.org/W2288135719","https://openalex.org/W2323749021","https://openalex.org/W2533590149","https://openalex.org/W2901989338","https://openalex.org/W200102888","https://openalex.org/W82005754","https://openalex.org/W2334448276"],"abstract_inverted_index":{"Gunshot":[0],"sound":[1,183],"classification":[2,134],"plays":[3],"a":[4,62,177],"crucial":[5],"role":[6],"in":[7,25,61],"public":[8],"safety,":[9],"forensic":[10],"investigations,":[11],"and":[12,47,94,115,128,146],"intelligent":[13],"surveillance":[14],"systems.":[15],"This":[16,86],"study":[17],"evaluates":[18],"the":[19,88,161],"performance":[20],"of":[21,52,90,96,163],"deep":[22,141,169],"learning":[23],"models":[24,170],"classifying":[26],"firearm":[27,58,182],"sounds":[28],"by":[29],"analyzing":[30],"twelve":[31],"time-frequency":[32,153],"spectrogram":[33,120],"representations,":[34],"including":[35],"Mel,":[36],"Bark,":[37],"MFCC,":[38],"CQT,":[39,126],"Cochleagram,":[40,127],"STFT,":[41],"FFT,":[42],"Reassigned,":[43],"Chroma,":[44],"Spectral":[45],"Contrast,":[46],"Wavelet.":[48],"The":[49],"dataset":[50],"consists":[51],"2148":[53],"gunshot":[54],"recordings":[55],"from":[56],"four":[57],"types,":[59],"collected":[60],"semi-controlled":[63],"outdoor":[64],"environment":[65],"under":[66],"multi-orientation":[67],"conditions.":[68],"To":[69],"leverage":[70],"advanced":[71],"computer":[72],"vision":[73],"techniques,":[74],"all":[75],"spectrograms":[76,130],"were":[77],"converted":[78],"into":[79,155],"RGB":[80,156],"images":[81,157],"using":[82],"perceptually":[83],"informed":[84],"colormaps.":[85],"enabled":[87],"application":[89],"image":[91,106],"processing":[92,165],"approaches":[93],"fine-tuning":[95],"pre-trained":[97],"Convolutional":[98],"Neural":[99],"Networks":[100],"(CNNs)":[101],"originally":[102],"developed":[103],"for":[104,180],"natural":[105],"classification.":[107,184],"Six":[108],"CNN":[109],"architectures-ResNet18,":[110],"ResNet50,":[111],"ResNet101,":[112],"GoogLeNet,":[113],"Inception-v3,":[114],"InceptionResNetV2-were":[116],"trained":[117],"on":[118],"these":[119],"images.":[121],"Experimental":[122],"results":[123],"indicate":[124],"that":[125,151],"Mel":[129],"consistently":[131],"achieved":[132],"high":[133],"accuracy,":[135],"exceeding":[136],"94%":[137],"when":[138],"paired":[139],"with":[140],"CNNs":[142],"such":[143],"as":[144],"ResNet101":[145],"InceptionResNetV2.":[147],"These":[148],"findings":[149],"demonstrate":[150],"transforming":[152],"features":[154],"not":[158],"only":[159],"facilitates":[160],"use":[162],"image-based":[164],"but":[166],"also":[167],"allows":[168],"to":[171],"capture":[172],"rich":[173],"spectral-temporal":[174],"patterns,":[175],"providing":[176],"robust":[178],"framework":[179],"accurate":[181]},"counts_by_year":[{"year":2025,"cited_by_count":4}],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2025-10-10T00:00:00"}
