{"id":"https://openalex.org/W2108850708","doi":"https://doi.org/10.1109/iros.2008.4650821","title":"A robot listens to music and counts its beats aloud by separating music from counting voice","display_name":"A robot listens to music and counts its beats aloud by separating music from counting voice","publication_year":2008,"publication_date":"2008-09-01","ids":{"openalex":"https://openalex.org/W2108850708","doi":"https://doi.org/10.1109/iros.2008.4650821","mag":"2108850708"},"language":"en","primary_location":{"id":"doi:10.1109/iros.2008.4650821","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2008.4650821","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE/RSJ International Conference on Intelligent Robots and Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073484126","display_name":"Teruhiro Mizumoto","orcid":"https://orcid.org/0000-0003-0281-1205"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"T. Mizumoto","raw_affiliation_strings":["Graduate School of Informatics, Kyoto University, Sakyo, Kyoto, Japan","Grad. Sch. of Inf., Kyoto-Univ., Kyoto"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Kyoto University, Sakyo, Kyoto, Japan","institution_ids":["https://openalex.org/I22299242"]},{"raw_affiliation_string":"Grad. Sch. of Inf., Kyoto-Univ., Kyoto","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018403421","display_name":"Ryu Takeda","orcid":"https://orcid.org/0009-0007-0518-6245"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"R. Takeda","raw_affiliation_strings":["Graduate School of Informatics, Kyoto University, Sakyo, Kyoto, Japan","Grad. Sch. of Inf., Kyoto-Univ., Kyoto"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Kyoto University, Sakyo, Kyoto, Japan","institution_ids":["https://openalex.org/I22299242"]},{"raw_affiliation_string":"Grad. Sch. of Inf., Kyoto-Univ., Kyoto","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067956319","display_name":"Kazuyoshi Yoshii","orcid":"https://orcid.org/0000-0001-8387-8609"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"K. Yoshii","raw_affiliation_strings":["Graduate School of Informatics, Kyoto University, Sakyo, Kyoto, Japan","Grad. Sch. of Inf., Kyoto-Univ., Kyoto"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Kyoto University, Sakyo, Kyoto, Japan","institution_ids":["https://openalex.org/I22299242"]},{"raw_affiliation_string":"Grad. Sch. of Inf., Kyoto-Univ., Kyoto","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049614400","display_name":"Kazunori Komatani","orcid":"https://orcid.org/0000-0002-6052-600X"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"K. Komatani","raw_affiliation_strings":["Graduate School of Informatics, Kyoto University, Sakyo, Kyoto, Japan","Grad. Sch. of Inf., Kyoto-Univ., Kyoto"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Kyoto University, Sakyo, Kyoto, Japan","institution_ids":["https://openalex.org/I22299242"]},{"raw_affiliation_string":"Grad. Sch. of Inf., Kyoto-Univ., Kyoto","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055922202","display_name":"Tetsuya Ogata","orcid":"https://orcid.org/0000-0001-7015-0379"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"T. Ogata","raw_affiliation_strings":["Graduate School of Informatics, Kyoto University, Sakyo, Kyoto, Japan","Grad. Sch. of Inf., Kyoto-Univ., Kyoto"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Kyoto University, Sakyo, Kyoto, Japan","institution_ids":["https://openalex.org/I22299242"]},{"raw_affiliation_string":"Grad. Sch. of Inf., Kyoto-Univ., Kyoto","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005141184","display_name":"Hiroshi G. Okuno","orcid":"https://orcid.org/0000-0002-8704-4318"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"H.G. Okuno","raw_affiliation_strings":["Graduate School of Informatics, Kyoto University, Sakyo, Kyoto, Japan","Grad. Sch. of Inf., Kyoto-Univ., Kyoto"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Kyoto University, Sakyo, Kyoto, Japan","institution_ids":["https://openalex.org/I22299242"]},{"raw_affiliation_string":"Grad. Sch. of Inf., Kyoto-Univ., Kyoto","institution_ids":["https://openalex.org/I22299242"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5073484126"],"corresponding_institution_ids":["https://openalex.org/I22299242"],"apc_list":null,"apc_paid":null,"fwci":2.6588,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.90136808,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1538","last_page":"1543"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/beat","display_name":"Beat (acoustics)","score":0.7148393392562866},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6689885854721069},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6119487285614014},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4978320598602295},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.4256686568260193},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3372621536254883},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.28687888383865356},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.1707005798816681},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.13593623042106628}],"concepts":[{"id":"https://openalex.org/C189809214","wikidata":"https://www.wikidata.org/wiki/Q829522","display_name":"Beat (acoustics)","level":2,"score":0.7148393392562866},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6689885854721069},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6119487285614014},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4978320598602295},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.4256686568260193},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3372621536254883},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.28687888383865356},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.1707005798816681},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.13593623042106628},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/iros.2008.4650821","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2008.4650821","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE/RSJ International Conference on Intelligent Robots and Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.141.6318","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.141.6318","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://winnie.kuis.kyoto-u.ac.jp/~okuno/paper/IROS08-Mizumoto.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1492221128","https://openalex.org/W1523943344","https://openalex.org/W1974052916","https://openalex.org/W1999542175","https://openalex.org/W2001771035","https://openalex.org/W2004415003","https://openalex.org/W2090462684","https://openalex.org/W2114640443","https://openalex.org/W2121788372","https://openalex.org/W2128456226","https://openalex.org/W2145482038","https://openalex.org/W2152205330","https://openalex.org/W2317483166","https://openalex.org/W6631462732","https://openalex.org/W6651343493"],"related_works":["https://openalex.org/W2317723112","https://openalex.org/W2475724061","https://openalex.org/W2773393136","https://openalex.org/W2174706483","https://openalex.org/W2997121352","https://openalex.org/W419536403","https://openalex.org/W2506280730","https://openalex.org/W4237969969","https://openalex.org/W1594297642","https://openalex.org/W2366328218"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,165,200],"beat-counting":[4,58],"robot":[5,121],"that":[6,34,122,191,206],"can":[7],"count":[8],"musical":[9,148],"beats":[10],"aloud,":[11],"i.e.,":[12],"speak":[13],"ldquoone,":[14],"two,":[15,19],"three,":[16],"four,":[17],"one,":[18],"...rdquo":[20],"along":[21],"music,":[22],"while":[23],"listening":[24],"to":[25,43,49,161],"music":[26,45,105,162,177,212],"by":[27,76,91],"using":[28],"its":[29],"own":[30,52,209],"ears.":[31,92],"Music-understanding":[32],"robots":[33],"interact":[35],"with":[36,127],"humans":[37],"should":[38],"be":[39],"able":[40],"not":[41],"only":[42],"recognize":[44],"internally,":[46],"but":[47],"also":[48],"express":[50],"their":[51],"internal":[53],"states.":[54],"To":[55,133],"develop":[56],"our":[57],"robot,":[59],"we":[60,115,137],"have":[61],"tackled":[62],"three":[63],"issues:":[64],"(1)":[65,142],"recognition":[66,112,213],"of":[67,73,82,101,109,125,130,157,173,195],"hierarchical":[68],"beat":[69,111,143],"structures,":[70],"(2)":[71,154],"expression":[72],"these":[74,135],"structures":[75],"counting":[77,83,102,158,179,197],"beats,":[78],"and":[79,152,169,178],"(3)":[80,97,170],"suppression":[81],"voice":[84,103,159,180,198,210],"(self-generated":[85],"sound)":[86],"in":[87,104],"sound":[88,174],"mixtures":[89,175],"recorded":[90],"The":[93],"main":[94],"issue":[95,129],"is":[96,123],"because":[98],"the":[99,107,110,117,128,139,193,196],"interference":[100],"causes":[106],"decrease":[108],"accuracy.":[113],"So":[114],"designed":[116],"architecture":[118],"for":[119],"music-understanding":[120],"capable":[124],"dealing":[126],"self-generated":[131],"sounds.":[132],"solve":[134],"issues,":[136],"took":[138],"following":[140],"approaches:":[141],"structure":[144],"prediction":[145],"based":[146,185],"on":[147,150,186],"knowledge":[149],"chords":[151],"drums,":[153],"speed":[155],"control":[156],"according":[160],"tempo":[163],"via":[164,181],"vocoder":[166],"called":[167],"STRAIGHT,":[168],"semi-blind":[171],"separation":[172],"into":[176],"an":[182],"adaptive":[183],"filter":[184],"ICA":[187],"(independent":[188],"component":[189],"analysis)":[190],"uses":[192],"waveform":[194],"as":[199],"prior":[201],"knowledge.":[202],"Experimental":[203],"result":[204],"showed":[205],"suppressing":[207],"robotpsilas":[208],"improved":[211],"capability.":[214]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
