{"id":"https://openalex.org/W1965073280","doi":"https://doi.org/10.1145/2043612.2043615","title":"Beat space segmentation and octave scale cepstral feature for sung language recognition in pop music","display_name":"Beat space segmentation and octave scale cepstral feature for sung language recognition in pop music","publication_year":2011,"publication_date":"2011-11-01","ids":{"openalex":"https://openalex.org/W1965073280","doi":"https://doi.org/10.1145/2043612.2043615","mag":"1965073280"},"language":"en","primary_location":{"id":"doi:10.1145/2043612.2043615","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2043612.2043615","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061102347","display_name":"Namunu C. Maddage","orcid":null},"institutions":[{"id":"https://openalex.org/I82951845","display_name":"RMIT University","ror":"https://ror.org/04ttjf776","country_code":"AU","type":"education","lineage":["https://openalex.org/I82951845"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Namunu C. Maddage","raw_affiliation_strings":["Royal Melbourne Institute of Technology University (RMIT), Melbourne, Australia","Royal Melbourne Institute of Technology University (RMIT), Melbourne, Australia#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Royal Melbourne Institute of Technology University (RMIT), Melbourne, Australia","institution_ids":["https://openalex.org/I82951845"]},{"raw_affiliation_string":"Royal Melbourne Institute of Technology University (RMIT), Melbourne, Australia#TAB#","institution_ids":["https://openalex.org/I82951845"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":"https://orcid.org/0000-0001-9158-9401"},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Haizhou Li","raw_affiliation_strings":["Institute for Infocomm Research (I2R), Singapore","Institute for InfoComm Research (I2R), Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research (I2R), Singapore","institution_ids":["https://openalex.org/I3005327000"]},{"raw_affiliation_string":"Institute for InfoComm Research (I2R), Singapore","institution_ids":["https://openalex.org/I3005327000"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5061102347"],"corresponding_institution_ids":["https://openalex.org/I82951845"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04952431,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"7","issue":"4","first_page":"1","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.7410272359848022},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7342579960823059},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7010097503662109},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6690286993980408},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.5203304290771484},{"id":"https://openalex.org/keywords/octave","display_name":"Octave (electronics)","score":0.49823737144470215},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.49191975593566895},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4657372534275055},{"id":"https://openalex.org/keywords/cepstrum","display_name":"Cepstrum","score":0.4472118020057678},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.44448283314704895},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.43703073263168335},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.43118369579315186},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.1607404351234436},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.07072135806083679}],"concepts":[{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.7410272359848022},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7342579960823059},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7010097503662109},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6690286993980408},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.5203304290771484},{"id":"https://openalex.org/C85841341","wikidata":"https://www.wikidata.org/wiki/Q1135984","display_name":"Octave (electronics)","level":2,"score":0.49823737144470215},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.49191975593566895},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4657372534275055},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.4472118020057678},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.44448283314704895},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.43703073263168335},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.43118369579315186},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.1607404351234436},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.07072135806083679},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/2043612.2043615","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2043612.2043615","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},{"id":"pmh:oai:alma.61RMIT_INST:11246582010001341","is_oa":false,"landing_page_url":"http://doi.org/10.1145/2043612.2043615","pdf_url":null,"source":{"id":"https://openalex.org/S4306402074","display_name":"RMIT Research Repository (RMIT University Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I82951845","host_organization_name":"RMIT University","host_organization_lineage":["https://openalex.org/I82951845"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:figshare.com:article/27449295","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:figshare.com:article/27449295","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7300000190734863}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W99528949","https://openalex.org/W99956014","https://openalex.org/W110761666","https://openalex.org/W118378156","https://openalex.org/W126852431","https://openalex.org/W131170453","https://openalex.org/W135838486","https://openalex.org/W1482650115","https://openalex.org/W1488564527","https://openalex.org/W1492190885","https://openalex.org/W1493163583","https://openalex.org/W1496437476","https://openalex.org/W1530092271","https://openalex.org/W1554104313","https://openalex.org/W1560013842","https://openalex.org/W1800115528","https://openalex.org/W1973706390","https://openalex.org/W1989859754","https://openalex.org/W1993482042","https://openalex.org/W1994957142","https://openalex.org/W1996135305","https://openalex.org/W2026662327","https://openalex.org/W2029776772","https://openalex.org/W2042105302","https://openalex.org/W2042487788","https://openalex.org/W2045801524","https://openalex.org/W2067063915","https://openalex.org/W2088867696","https://openalex.org/W2095282534","https://openalex.org/W2095833672","https://openalex.org/W2097685957","https://openalex.org/W2101151533","https://openalex.org/W2103869314","https://openalex.org/W2106284094","https://openalex.org/W2114640443","https://openalex.org/W2125324924","https://openalex.org/W2125838338","https://openalex.org/W2131913731","https://openalex.org/W2142575067","https://openalex.org/W2143321035","https://openalex.org/W2144649408","https://openalex.org/W2146194791","https://openalex.org/W2148600927","https://openalex.org/W2151785968","https://openalex.org/W2154473523","https://openalex.org/W2161723943","https://openalex.org/W2165880886","https://openalex.org/W2172287020","https://openalex.org/W2603415636","https://openalex.org/W3143835353","https://openalex.org/W4243535721","https://openalex.org/W4243713509","https://openalex.org/W6604047900","https://openalex.org/W6605595087","https://openalex.org/W6624852173","https://openalex.org/W6633146083"],"related_works":["https://openalex.org/W2046295345","https://openalex.org/W2148609665","https://openalex.org/W2350343535","https://openalex.org/W2021671070","https://openalex.org/W2388195976","https://openalex.org/W2124093511","https://openalex.org/W2988724699","https://openalex.org/W2383152411","https://openalex.org/W1780863593","https://openalex.org/W2970933904"],"abstract_inverted_index":{"Sung":[0],"language":[1,43,115,144],"recognition":[2,116,145],"relies":[3],"on":[4,120,157],"both":[5],"effective":[6],"feature":[7,138],"extraction":[8],"and":[9,95,109,132],"acoustic":[10,49,73,85,159],"modeling.":[11],"In":[12],"this":[13],"paper,":[14],"we":[15,100],"study":[16,66],"rhythm":[17,54,163],"based":[18,55,164],"music":[19,165],"segmentation":[20,40,56],"with":[21,82],"the":[22,26,29,33,53,67,83,102,136],"frame":[23],"size":[24],"being":[25],"duration":[27],"of":[28,69,105,123,147],"smallest":[30],"note":[31],"in":[32,41,113,128],"music,":[34],"as":[35],"opposed":[36],"to":[37],"fixed":[38,61],"length":[39,62],"spoken":[42],"recognition.":[44],"It":[45],"is":[46],"found":[47],"that":[48,135],"features":[50],"extracted":[51,161],"from":[52,60],"scheme":[57],"outperform":[58],"those":[59],"segmentation.":[63,166],"We":[64],"also":[65],"effectiveness":[68],"a":[70,121],"musically":[71],"motivated":[72],"feature.":[74],"Octave":[75],"scale":[76],"cepstral":[77,89,97],"coefficients":[78,93],"(OSCCs)":[79],"by":[80],"comparing":[81],"other":[84,140],"features:":[86],"Log":[87],"frequency":[88],"coefficients,":[90],"Linear":[91],"prediction":[92],"(LPC)":[94],"LPC-derived":[96],"coefficients.":[98],"Finally,":[99],"examine":[101],"modeling":[103],"capabilities":[104],"Gaussian":[106,152],"mixture":[107,153],"models":[108,154],"support":[110],"vector":[111],"machines":[112],"sung":[114,127,143],"experiments.":[117],"Experiments":[118],"conducted":[119],"corpus":[122],"400":[124],"popular":[125],"songs":[126],"English,":[129],"Chinese,":[130],"German,":[131],"Indonesian,":[133],"showed":[134],"OSCC":[137],"outperforms":[139],"features.":[141],"A":[142],"accuracy":[146],"64.9%":[148],"was":[149],"achieved":[150],"when":[151],"were":[155],"trained":[156],"shifted-delta-OSCC":[158],"features,":[160],"via":[162]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
