{"id":"https://openalex.org/W3015641245","doi":"https://doi.org/10.1109/icassp40776.2020.9053798","title":"Pitch Estimation Via Self-Supervision","display_name":"Pitch Estimation Via Self-Supervision","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3015641245","doi":"https://doi.org/10.1109/icassp40776.2020.9053798","mag":"3015641245"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9053798","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053798","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002559095","display_name":"Beat Gfeller","orcid":"https://orcid.org/0000-0002-1321-9015"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Beat Gfeller","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038357639","display_name":"Christian Frank","orcid":"https://orcid.org/0000-0002-1930-5571"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christian Frank","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011258273","display_name":"Dominik Roblek","orcid":"https://orcid.org/0000-0002-2155-450X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dominik Roblek","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081709307","display_name":"Matt Sharifi","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matt Sharifi","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033341878","display_name":"Marco Tagliasacchi","orcid":"https://orcid.org/0000-0002-7682-6795"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marco Tagliasacchi","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062310298","display_name":"Mihajlo Velimirovi\u0107","orcid":"https://orcid.org/0000-0002-5588-6828"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mihajlo Velimirovic","raw_affiliation_strings":["Google Research"],"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5002559095"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":0.7577,"has_fulltext":false,"cited_by_count":25,"citation_normalized_percentile":{"value":0.69664253,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"17","issue":null,"first_page":"3527","last_page":"3531"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7772698402404785},{"id":"https://openalex.org/keywords/pitch-detection-algorithm","display_name":"Pitch detection algorithm","score":0.6809931993484497},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5974975824356079},{"id":"https://openalex.org/keywords/voice","display_name":"Voice","score":0.5922090411186218},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5113441348075867},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5089035034179688},{"id":"https://openalex.org/keywords/contrast","display_name":"Contrast (vision)","score":0.4588845372200012},{"id":"https://openalex.org/keywords/estimation","display_name":"Estimation","score":0.45410406589508057},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40970414876937866},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3408527970314026},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.22935602068901062},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08790022134780884}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7772698402404785},{"id":"https://openalex.org/C135622632","wikidata":"https://www.wikidata.org/wiki/Q7198851","display_name":"Pitch detection algorithm","level":3,"score":0.6809931993484497},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5974975824356079},{"id":"https://openalex.org/C552089266","wikidata":"https://www.wikidata.org/wiki/Q494510","display_name":"Voice","level":2,"score":0.5922090411186218},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5113441348075867},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5089035034179688},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.4588845372200012},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.45410406589508057},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40970414876937866},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3408527970314026},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.22935602068901062},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08790022134780884},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9053798","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053798","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W198589515","https://openalex.org/W1522301498","https://openalex.org/W1791774586","https://openalex.org/W1975079546","https://openalex.org/W1976069042","https://openalex.org/W2034300186","https://openalex.org/W2034995162","https://openalex.org/W2040864563","https://openalex.org/W2046033161","https://openalex.org/W2091425152","https://openalex.org/W2097423688","https://openalex.org/W2107328928","https://openalex.org/W2107831318","https://openalex.org/W2108771579","https://openalex.org/W2118774185","https://openalex.org/W2124539664","https://openalex.org/W2135431242","https://openalex.org/W2161632835","https://openalex.org/W2296724634","https://openalex.org/W2603256597","https://openalex.org/W2606176153","https://openalex.org/W2612464443","https://openalex.org/W2621177543","https://openalex.org/W2755000466","https://openalex.org/W2774581901","https://openalex.org/W2891901715","https://openalex.org/W2951535099","https://openalex.org/W2962866891","https://openalex.org/W2964121744","https://openalex.org/W3124061379","https://openalex.org/W4232595861","https://openalex.org/W4289563769","https://openalex.org/W6607999264","https://openalex.org/W6631190155","https://openalex.org/W6676245417","https://openalex.org/W6679739227","https://openalex.org/W6697040288","https://openalex.org/W6736723571","https://openalex.org/W6746960179","https://openalex.org/W6754881664","https://openalex.org/W6788931238"],"related_works":["https://openalex.org/W2036753972","https://openalex.org/W1996861825","https://openalex.org/W2117854015","https://openalex.org/W2044564457","https://openalex.org/W1548784694","https://openalex.org/W2111139066","https://openalex.org/W1714737510","https://openalex.org/W2387387595","https://openalex.org/W284503395","https://openalex.org/W2400828047"],"abstract_inverted_index":{"We":[0],"present":[1],"a":[2,107,144],"method":[3,118],"to":[4,14,20,50,56,123,142],"estimate":[5],"the":[6,47,59,76,79,87,92,131,149],"fundamental":[7],"frequency":[8],"in":[9,91,148],"monophonic":[10,128],"audio,":[11,129],"often":[12],"referred":[13],"as":[15],"pitch":[16,52,73,89,108,116],"estimation.":[17],"In":[18,137],"contrast":[19],"existing":[21],"methods,":[22],"our":[23,115],"neural":[24],"network":[25,48],"can":[26],"be":[27],"fully":[28,124],"trained":[29],"only":[30],"on":[31,127],"unlabeled":[32],"data,":[33],"using":[34,154],"self-supervision.":[35],"A":[36],"tiny":[37],"amount":[38],"of":[39],"labeled":[40,135],"data":[41],"is":[42,58,82],"needed":[43],"solely":[44],"for":[45,133],"mapping":[46],"outputs":[49,81],"absolute":[51],"values.":[53],"The":[54],"key":[55],"this":[57,97],"observation":[60],"that":[61,114],"if":[62],"one":[63,68],"creates":[64],"two":[65],"examples":[66],"from":[67],"original":[69,93],"audio":[70],"clip":[71],"by":[72],"shifting":[74],"both,":[75],"difference":[77],"between":[78],"correct":[80],"known,":[83],"without":[84,130,153],"even":[85],"knowing":[86],"actual":[88],"value":[90],"clip.":[94],"Somewhat":[95],"surprisingly,":[96],"idea":[98],"combined":[99],"with":[100],"an":[101,120],"auxiliary":[102],"reconstruction":[103],"loss":[104],"allows":[105],"training":[106],"estimation":[109,117],"model.":[110],"Our":[111],"results":[112],"show":[113],"obtains":[119],"accuracy":[121],"comparable":[122],"supervised":[125],"models":[126],"need":[132],"large":[134],"datasets.":[136],"addition,":[138],"we":[139],"are":[140],"able":[141],"train":[143],"voicing":[145],"detection":[146],"output":[147],"same":[150],"model,":[151],"again":[152],"any":[155],"labels.":[156]},"counts_by_year":[{"year":2024,"cited_by_count":20},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
