{"id":"https://openalex.org/W4403209943","doi":"https://doi.org/10.1109/ojcs.2024.3476416","title":"MusicTalk: A Microservice Approach for Musical Instrument Recognition","display_name":"MusicTalk: A Microservice Approach for Musical Instrument Recognition","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4403209943","doi":"https://doi.org/10.1109/ojcs.2024.3476416"},"language":"en","primary_location":{"id":"doi:10.1109/ojcs.2024.3476416","is_oa":true,"landing_page_url":"https://doi.org/10.1109/ojcs.2024.3476416","pdf_url":null,"source":{"id":"https://openalex.org/S4210176459","display_name":"IEEE Open Journal of the Computer Society","issn_l":"2644-1268","issn":["2644-1268"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Open Journal of the Computer Society","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/ojcs.2024.3476416","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041723502","display_name":"Yi\u2010Bing Lin","orcid":"https://orcid.org/0000-0001-6841-4718"},"institutions":[{"id":"https://openalex.org/I184693016","display_name":"China Medical University","ror":"https://ror.org/00v408z34","country_code":"TW","type":"education","lineage":["https://openalex.org/I184693016"]},{"id":"https://openalex.org/I91656880","display_name":"China Medical University","ror":"https://ror.org/032d4f246","country_code":"CN","type":"education","lineage":["https://openalex.org/I91656880"]}],"countries":["CN","TW"],"is_corresponding":true,"raw_author_name":"Yi-Bing Lin","raw_affiliation_strings":["Department of Biomedical Informatics, China Medical University, TaiChung City, Taiwan","China Medical University, Miin Wu School of Computing, National Cheng Kung University"],"raw_orcid":"https://orcid.org/0000-0001-6841-4718","affiliations":[{"raw_affiliation_string":"Department of Biomedical Informatics, China Medical University, TaiChung City, Taiwan","institution_ids":["https://openalex.org/I184693016"]},{"raw_affiliation_string":"China Medical University, Miin Wu School of Computing, National Cheng Kung University","institution_ids":["https://openalex.org/I91656880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030335757","display_name":"Chang\u2010Chieh Cheng","orcid":"https://orcid.org/0000-0002-9103-3400"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]},{"id":"https://openalex.org/I4210132233","display_name":"Ion Technology Center (Japan)","ror":"https://ror.org/02m8g1k56","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210132233"]}],"countries":["JP","TW"],"is_corresponding":false,"raw_author_name":"Chang-Chieh Cheng","raw_affiliation_strings":["Information Technology Service Center, National Yang Ming Chiao Tung University, Hsinchu, Taiwan","Information Technology Service CenterNYCU"],"raw_orcid":"https://orcid.org/0000-0002-9103-3400","affiliations":[{"raw_affiliation_string":"Information Technology Service Center, National Yang Ming Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]},{"raw_affiliation_string":"Information Technology Service CenterNYCU","institution_ids":["https://openalex.org/I4210132233"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068816864","display_name":"Shih-Chuan Chiu","orcid":null},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Shih-Chuan Chiu","raw_affiliation_strings":["Department of Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan","Department of Computer ScienceNYCU"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]},{"raw_affiliation_string":"Department of Computer ScienceNYCU","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5041723502"],"corresponding_institution_ids":["https://openalex.org/I184693016","https://openalex.org/I91656880"],"apc_list":{"value":1750,"currency":"USD","value_usd":1750},"apc_paid":{"value":1750,"currency":"USD","value_usd":1750},"fwci":0.6479,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.67585072,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"5","issue":null,"first_page":"612","last_page":"623"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13996","display_name":"Diverse Musicological Studies","score":0.9839000105857849,"subfield":{"id":"https://openalex.org/subfields/1210","display_name":"Music"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/musical-instrument","display_name":"Musical instrument","score":0.6806471347808838},{"id":"https://openalex.org/keywords/musical","display_name":"Musical","score":0.6369721293449402},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.48487934470176697},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.36001718044281006},{"id":"https://openalex.org/keywords/visual-arts","display_name":"Visual arts","score":0.21105089783668518},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.17690855264663696},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.14051106572151184},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07156959176063538}],"concepts":[{"id":"https://openalex.org/C2983311337","wikidata":"https://www.wikidata.org/wiki/Q34379","display_name":"Musical instrument","level":2,"score":0.6806471347808838},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.6369721293449402},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.48487934470176697},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36001718044281006},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.21105089783668518},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.17690855264663696},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.14051106572151184},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07156959176063538}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ojcs.2024.3476416","is_oa":true,"landing_page_url":"https://doi.org/10.1109/ojcs.2024.3476416","pdf_url":null,"source":{"id":"https://openalex.org/S4210176459","display_name":"IEEE Open Journal of the Computer Society","issn_l":"2644-1268","issn":["2644-1268"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Open Journal of the Computer Society","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:8fc864f6507c4ac1953df642f56db16e","is_oa":true,"landing_page_url":"https://doaj.org/article/8fc864f6507c4ac1953df642f56db16e","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Open Journal of the Computer Society, Vol 5, Pp 612-623 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/ojcs.2024.3476416","is_oa":true,"landing_page_url":"https://doi.org/10.1109/ojcs.2024.3476416","pdf_url":null,"source":{"id":"https://openalex.org/S4210176459","display_name":"IEEE Open Journal of the Computer Society","issn_l":"2644-1268","issn":["2644-1268"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Open Journal of the Computer Society","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322609","display_name":"China Medical University Hospital","ror":"https://ror.org/0368s4g32"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2117352205","https://openalex.org/W2593116425","https://openalex.org/W2595054336","https://openalex.org/W2616247523","https://openalex.org/W2759171953","https://openalex.org/W2892844477","https://openalex.org/W2969414698","https://openalex.org/W3094550259","https://openalex.org/W3196974791","https://openalex.org/W3205475937","https://openalex.org/W4224277409","https://openalex.org/W4309352855","https://openalex.org/W4312894600","https://openalex.org/W4312916726","https://openalex.org/W4320712896","https://openalex.org/W4362613132","https://openalex.org/W4375869388","https://openalex.org/W4386736895","https://openalex.org/W4388240379","https://openalex.org/W4399951340","https://openalex.org/W6697040288","https://openalex.org/W6747336215","https://openalex.org/W6756597712","https://openalex.org/W6778572914","https://openalex.org/W6784333009","https://openalex.org/W6803752119"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2415999852","https://openalex.org/W2030575009","https://openalex.org/W2147114809","https://openalex.org/W4390012055","https://openalex.org/W2991141425","https://openalex.org/W2591743215","https://openalex.org/W7350038"],"abstract_inverted_index":{"Musical":[0],"instrument":[1,65,90],"recognition":[2,48],"is":[3,66,278],"the":[4,55,59,111,165,168,195,212,250,260],"process":[5],"of":[6,31,167,180,202,220,238,264,270],"using":[7,240],"machine":[8],"learning":[9],"or":[10],"audio":[11,23,72,141],"signal":[12],"processing":[13],"to":[14,57,120,176,280],"identify":[15],"and":[16,41,68,155,184,199,204,249],"classify":[17],"different":[18],"musical":[19,32,89],"instruments":[20],"from":[21],"an":[22,140],"recording.":[24],"This":[25,172],"capability":[26],"enables":[27,235],"more":[28],"precise":[29],"analysis":[30,160,173],"pieces,":[33],"aiding":[34],"in":[35,71,189,223],"tasks":[36],"like":[37],"transcription,":[38],"music":[39,146],"recommendation,":[40],"automated":[42],"composition.":[43],"The":[44,266],"challenges":[45],"include":[46],"(1)":[47],"models":[49],"not":[50],"being":[51],"accurate":[52],"enough,":[53],"(2)":[54],"need":[56],"retrain":[58],"entire":[60],"model":[61],"when":[62],"a":[63,86,101,244],"new":[64],"added,":[67],"(3)":[69],"differences":[70],"formats":[73],"that":[74,143,254],"prevent":[75],"direct":[76],"usage.":[77],"To":[78],"address":[79],"these":[80],"challenges,":[81],"this":[82,276],"article":[83],"introduces":[84,100],"MusicTalk,":[85,187],"microservice":[87],"based":[88],"(MI)":[91],"detection":[92,117,170,257],"system,":[93],"with":[94,134,182],"several":[95],"key":[96],"contributions.":[97],"Firstly,":[98],"MusicTalk":[99,124,138,221,234,271],"novel":[102],"patchout":[103,183],"mechanism":[104],"named":[105],"Brightness":[106],"Characteristic":[107],"Based":[108],"Patchout":[109],"for":[110,207,275],"ViT":[112,181],"algorithm,":[113],"which":[114,210],"enhances":[115,259],"MI":[116,127,169,230,256],"accuracy":[118,191,258],"compared":[119],"existing":[121],"solutions.":[122],"Secondly,":[123],"integrates":[125],"individual":[126],"detectors":[128,231],"as":[129,150,232],"microservices,":[130,233],"facilitating":[131],"efficient":[132],"interaction":[133],"other":[135],"microservices.":[136],"Thirdly,":[137],"incorporates":[139],"shaper":[142],"unifies":[144],"diverse":[145],"open":[147],"datasets":[148],"such":[149],"Audioset,":[151],"Openmic-2018,":[152],"MedleyDB,":[153],"URMP,":[154],"INSTDB.":[156],"By":[157,228],"employing":[158],"Grad-CAM":[159],"on":[161],"Mel-Spectrograms,":[162],"we":[163,252],"elucidate":[164],"characteristics":[166],"model.":[171],"allows":[174],"us":[175],"optimize":[177],"ensemble":[178],"combinations":[179],"CNNs":[185],"within":[186],"resulting":[188],"high":[190],"rates.":[192],"For":[193],"instance,":[194],"system":[196],"achieves":[197],"precision":[198],"recall":[200],"rates":[201],"96.17%":[203],"95.77%":[205],"respectively":[206],"violin":[208],"detection,":[209],"are":[211],"highest":[213],"among":[214],"previous":[215,273],"approaches.":[216],"An":[217],"additional":[218],"advantage":[219],"lies":[222],"its":[224],"microservice-driven":[225],"visualization":[226,237],"capabilities.":[227],"integrating":[229],"seamless":[236],"songs":[239],"animated":[241],"avatars.":[242],"In":[243],"case":[245],"study":[246],"featuring":[247],"\u201cPeter":[248],"Wolf,\u201d":[251],"demonstrate":[253],"improved":[255],"visual":[261],"storytelling":[262],"impact":[263],"music.":[265],"overall":[267],"F1-score":[268],"improvement":[269],"over":[272],"approaches":[274],"song":[277],"up":[279],"12%.":[281]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
