{"id":"https://openalex.org/W4372259780","doi":"https://doi.org/10.1109/icassp49357.2023.10095294","title":"Large-Scale Nonverbal Vocalization Detection Using Transformers","display_name":"Large-Scale Nonverbal Vocalization Detection Using Transformers","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372259780","doi":"https://doi.org/10.1109/icassp49357.2023.10095294"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095294","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095294","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045277971","display_name":"Panagiotis Tzirakis","orcid":"https://orcid.org/0000-0001-9449-5339"},"institutions":[{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Panagiotis Tzirakis","raw_affiliation_strings":["Hume AI Inc.,New York City,New York,USA","Hume AI Inc., New York City, New York, USA"],"affiliations":[{"raw_affiliation_string":"Hume AI Inc.,New York City,New York,USA","institution_ids":["https://openalex.org/I174216632"]},{"raw_affiliation_string":"Hume AI Inc., New York City, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030891676","display_name":"Alice Baird","orcid":"https://orcid.org/0000-0002-7003-5650"},"institutions":[{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alice Baird","raw_affiliation_strings":["Hume AI Inc.,New York City,New York,USA","Hume AI Inc., New York City, New York, USA"],"affiliations":[{"raw_affiliation_string":"Hume AI Inc.,New York City,New York,USA","institution_ids":["https://openalex.org/I174216632"]},{"raw_affiliation_string":"Hume AI Inc., New York City, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012982350","display_name":"Jeffrey A. Brooks","orcid":"https://orcid.org/0000-0002-6370-3622"},"institutions":[{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jeffrey Brooks","raw_affiliation_strings":["Hume AI Inc.,New York City,New York,USA","University of California, Berkeley, California, USA","Hume AI Inc., New York City, New York, USA"],"affiliations":[{"raw_affiliation_string":"Hume AI Inc.,New York City,New York,USA","institution_ids":["https://openalex.org/I174216632"]},{"raw_affiliation_string":"University of California, Berkeley, California, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Hume AI Inc., New York City, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067816902","display_name":"Chris Gagne","orcid":"https://orcid.org/0000-0003-2241-5285"},"institutions":[{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Gagne","raw_affiliation_strings":["Hume AI Inc.,New York City,New York,USA","Hume AI Inc., New York City, New York, USA"],"affiliations":[{"raw_affiliation_string":"Hume AI Inc.,New York City,New York,USA","institution_ids":["https://openalex.org/I174216632"]},{"raw_affiliation_string":"Hume AI Inc., New York City, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024696407","display_name":"Lauren Kim","orcid":"https://orcid.org/0000-0001-9844-247X"},"institutions":[{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lauren Kim","raw_affiliation_strings":["Hume AI Inc.,New York City,New York,USA","Hume AI Inc., New York City, New York, USA"],"affiliations":[{"raw_affiliation_string":"Hume AI Inc.,New York City,New York,USA","institution_ids":["https://openalex.org/I174216632"]},{"raw_affiliation_string":"Hume AI Inc., New York City, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042938744","display_name":"Michael Opara","orcid":"https://orcid.org/0000-0001-7441-7026"},"institutions":[{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Opara","raw_affiliation_strings":["Hume AI Inc.,New York City,New York,USA","Hume AI Inc., New York City, New York, USA"],"affiliations":[{"raw_affiliation_string":"Hume AI Inc.,New York City,New York,USA","institution_ids":["https://openalex.org/I174216632"]},{"raw_affiliation_string":"Hume AI Inc., New York City, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109576792","display_name":"Christopher Gregory","orcid":null},"institutions":[{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Gregory","raw_affiliation_strings":["Hume AI Inc.,New York City,New York,USA","Hume AI Inc., New York City, New York, USA"],"affiliations":[{"raw_affiliation_string":"Hume AI Inc.,New York City,New York,USA","institution_ids":["https://openalex.org/I174216632"]},{"raw_affiliation_string":"Hume AI Inc., New York City, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024479964","display_name":"Jacob Metrick","orcid":null},"institutions":[{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jacob Metrick","raw_affiliation_strings":["Hume AI Inc.,New York City,New York,USA","Hume AI Inc., New York City, New York, USA"],"affiliations":[{"raw_affiliation_string":"Hume AI Inc.,New York City,New York,USA","institution_ids":["https://openalex.org/I174216632"]},{"raw_affiliation_string":"Hume AI Inc., New York City, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026856154","display_name":"Garrett Boseck","orcid":null},"institutions":[{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Garrett Boseck","raw_affiliation_strings":["Hume AI Inc.,New York City,New York,USA","Hume AI Inc., New York City, New York, USA"],"affiliations":[{"raw_affiliation_string":"Hume AI Inc.,New York City,New York,USA","institution_ids":["https://openalex.org/I174216632"]},{"raw_affiliation_string":"Hume AI Inc., New York City, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044184636","display_name":"Vineet Tiruvadi","orcid":"https://orcid.org/0000-0001-5071-7091"},"institutions":[{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vineet Tiruvadi","raw_affiliation_strings":["Hume AI Inc.,New York City,New York,USA","Hume AI Inc., New York City, New York, USA"],"affiliations":[{"raw_affiliation_string":"Hume AI Inc.,New York City,New York,USA","institution_ids":["https://openalex.org/I174216632"]},{"raw_affiliation_string":"Hume AI Inc., New York City, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043060302","display_name":"Bj\u00f6rn W. Schuller","orcid":"https://orcid.org/0000-0002-6478-8699"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Bj\u00f6rn Schuller","raw_affiliation_strings":["GLAM &#x2013; Group on Language, Audio &#x0026; Music, Imperial College,London,UK"],"affiliations":[{"raw_affiliation_string":"GLAM &#x2013; Group on Language, Audio &#x0026; Music, Imperial College,London,UK","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037853738","display_name":"Dacher Keltner","orcid":"https://orcid.org/0000-0001-9061-5292"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dacher Keltner","raw_affiliation_strings":["Hume AI Inc.,New York City,New York,USA","University of California, Berkeley, California, USA","Hume AI Inc., New York City, New York, USA"],"affiliations":[{"raw_affiliation_string":"Hume AI Inc.,New York City,New York,USA","institution_ids":["https://openalex.org/I174216632"]},{"raw_affiliation_string":"University of California, Berkeley, California, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Hume AI Inc., New York City, New York, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039945153","display_name":"Alan Cowen","orcid":"https://orcid.org/0000-0002-8381-5883"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alan Cowen","raw_affiliation_strings":["Hume AI Inc.,New York City,New York,USA","University of California, Berkeley, California, USA","Hume AI Inc., New York City, New York, USA"],"affiliations":[{"raw_affiliation_string":"Hume AI Inc.,New York City,New York,USA","institution_ids":["https://openalex.org/I174216632"]},{"raw_affiliation_string":"University of California, Berkeley, California, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Hume AI Inc., New York City, New York, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5045277971"],"corresponding_institution_ids":["https://openalex.org/I174216632"],"apc_list":null,"apc_paid":null,"fwci":2.3058,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.89034572,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/nonverbal-communication","display_name":"Nonverbal communication","score":0.7799665927886963},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5482396483421326},{"id":"https://openalex.org/keywords/laughter","display_name":"Laughter","score":0.5309127569198608},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4173702895641327},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.4034360349178314},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3937073349952698},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.37931370735168457},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.2566918730735779},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.08318930864334106}],"concepts":[{"id":"https://openalex.org/C145633318","wikidata":"https://www.wikidata.org/wiki/Q207125","display_name":"Nonverbal communication","level":2,"score":0.7799665927886963},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5482396483421326},{"id":"https://openalex.org/C2780775679","wikidata":"https://www.wikidata.org/wiki/Q170579","display_name":"Laughter","level":2,"score":0.5309127569198608},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4173702895641327},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.4034360349178314},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3937073349952698},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.37931370735168457},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.2566918730735779},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.08318930864334106},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095294","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095294","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7599999904632568,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W2081793746","https://openalex.org/W2085662862","https://openalex.org/W2332267591","https://openalex.org/W2593116425","https://openalex.org/W2889342907","https://openalex.org/W2892071465","https://openalex.org/W2895625244","https://openalex.org/W2959143276","https://openalex.org/W3036601975","https://openalex.org/W3158929093","https://openalex.org/W3196831814","https://openalex.org/W3197642003","https://openalex.org/W3197970772","https://openalex.org/W4229039499","https://openalex.org/W4254718357","https://openalex.org/W4283366505","https://openalex.org/W4283661339","https://openalex.org/W4283703738","https://openalex.org/W4297944245","https://openalex.org/W4311000453","https://openalex.org/W6703049012","https://openalex.org/W6755082579","https://openalex.org/W6780218876","https://openalex.org/W6810693714","https://openalex.org/W6839427101","https://openalex.org/W6839551726","https://openalex.org/W6847363464","https://openalex.org/W6991109109"],"related_works":["https://openalex.org/W2760111529","https://openalex.org/W4304693785","https://openalex.org/W3081604601","https://openalex.org/W2103493245","https://openalex.org/W4238600753","https://openalex.org/W4298012685","https://openalex.org/W2391280397","https://openalex.org/W2174919855","https://openalex.org/W4385559915","https://openalex.org/W2063843262"],"abstract_inverted_index":{"Detecting":[0],"emotionally":[1],"expressive":[2],"nonverbal":[3,54,124],"vocalizations":[4,55,64,78],"is":[5],"essential":[6],"to":[7,87,118,129],"developing":[8],"technologies":[9],"that":[10,42,65,150],"can":[11],"converse":[12],"fluently":[13],"with":[14],"humans.":[15],"The":[16,72],"affective":[17],"computing":[18],"community":[19],"has":[20,79],"largely":[21,81],"focused":[22],"on":[23,106],"understanding":[24],"the":[25,35,88,103,116,119,144,161,178],"intonation":[26],"of":[27,37,70,74,91,98,121,136,156,163,185],"emotional":[28,39,77,137],"speech":[29,51],"and":[30,61,146,171,173],"language.":[31],"However,":[32],"advances":[33],"in":[34,68,102],"study":[36],"vocal":[38],"behavior":[40],"suggest":[41],"emotions":[43],"may":[44],"be":[45],"more":[46,152],"readily":[47],"conveyed":[48],"not":[49],"by":[50,53,83],"but":[52],"such":[56,76,168],"as":[57,131,133,169],"laughs,":[58],"sighs,":[59],"shrieks,":[60],"grunts":[62],"\u2013":[63],"often":[66],"occur":[67],"lieu":[69],"speech.":[71],"task":[73,179],"detecting":[75,107],"been":[80],"overlooked":[82],"researchers,":[84],"likely":[85],"due":[86],"limited":[89],"availability":[90],"data":[92],"capturing":[93],"a":[94],"sufficiently":[95],"wide":[96],"variety":[97],"vocalizations.":[99,138],"Most":[100],"studies":[101],"literature":[104],"focus":[105],"laughter":[108],"or":[109],"cries.":[110],"In":[111],"this":[112],"paper,":[113],"we":[114,142],"present":[115],"first,":[117],"best":[120],"our":[122,140],"knowledge,":[123],"vocalization":[125],"detection":[126],"model":[127],"trained":[128],"detect":[130],"many":[132],"67":[134],"types":[135,184],"For":[139],"purposes,":[141],"use":[143,162],"large-scale":[145],"in-the-wild":[147],"HUME-VB":[148],"dataset":[149],"provides":[151],"than":[153],"156":[154],"h":[155],"data.":[157],"We":[158],"thoroughly":[159],"investigate":[160],"pre-trained":[164],"audio":[165],"transformer":[166],"models,":[167],"Wav2Vec2":[170],"Whisper,":[172],"provide":[174],"useful":[175],"insights":[176],"for":[177],"at":[180],"hand":[181],"using":[182],"different":[183],"noise":[186],"signals.":[187]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":2}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
