{"id":"https://openalex.org/W4362650921","doi":"https://doi.org/10.1109/aciiw57231.2022.10085991","title":"Jointly Predicting Emotion, Age, and Country Using Pre-Trained Acoustic Embedding","display_name":"Jointly Predicting Emotion, Age, and Country Using Pre-Trained Acoustic Embedding","publication_year":2022,"publication_date":"2022-10-18","ids":{"openalex":"https://openalex.org/W4362650921","doi":"https://doi.org/10.1109/aciiw57231.2022.10085991"},"language":"en","primary_location":{"id":"doi:10.1109/aciiw57231.2022.10085991","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aciiw57231.2022.10085991","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 10th International Conference on Affective Computing and Intelligent Interaction Workshops and Demos (ACIIW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065879041","display_name":"Bagus Tris Atmaja","orcid":"https://orcid.org/0000-0003-1560-2824"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Bagus Tris Atmaja","raw_affiliation_strings":["AIST,Tsukuba,Japan","AIST, Tsukuba, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AIST,Tsukuba,Japan","institution_ids":["https://openalex.org/I73613424"]},{"raw_affiliation_string":"AIST, Tsukuba, Japan","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040828453","display_name":"Zanjabila","orcid":null},"institutions":[{"id":"https://openalex.org/I166843116","display_name":"Sepuluh Nopember Institute of Technology","ror":"https://ror.org/05kbmmt89","country_code":"ID","type":"education","lineage":["https://openalex.org/I166843116"]}],"countries":["ID"],"is_corresponding":false,"raw_author_name":"Zanjabila","raw_affiliation_strings":["ITS,Surabaya,Indonesia","ITS, Surabaya, Indonesia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ITS,Surabaya,Indonesia","institution_ids":["https://openalex.org/I166843116"]},{"raw_affiliation_string":"ITS, Surabaya, Indonesia","institution_ids":["https://openalex.org/I166843116"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046057045","display_name":"Akira Sasou","orcid":"https://orcid.org/0000-0003-1700-0325"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Akira Sasou","raw_affiliation_strings":["AIST,Tsukuba,Japan","AIST, Tsukuba, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AIST,Tsukuba,Japan","institution_ids":["https://openalex.org/I73613424"]},{"raw_affiliation_string":"AIST, Tsukuba, Japan","institution_ids":["https://openalex.org/I73613424"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.5388,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.83414648,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/paralanguage","display_name":"Paralanguage","score":0.7718039751052856},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7074490785598755},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6603430509567261},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6130010485649109},{"id":"https://openalex.org/keywords/multi-task-learning","display_name":"Multi-task learning","score":0.5716640949249268},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.5438218712806702},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5212681293487549},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5155446529388428},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.4584704041481018},{"id":"https://openalex.org/keywords/regression","display_name":"Regression","score":0.42575889825820923},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3721087574958801},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12698251008987427},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.11524853110313416},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.0985860526561737},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.07623368501663208},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07599499821662903}],"concepts":[{"id":"https://openalex.org/C133378560","wikidata":"https://www.wikidata.org/wiki/Q1753225","display_name":"Paralanguage","level":2,"score":0.7718039751052856},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7074490785598755},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6603430509567261},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6130010485649109},{"id":"https://openalex.org/C28006648","wikidata":"https://www.wikidata.org/wiki/Q6934509","display_name":"Multi-task learning","level":3,"score":0.5716640949249268},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.5438218712806702},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5212681293487549},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5155446529388428},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.4584704041481018},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.42575889825820923},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3721087574958801},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12698251008987427},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.11524853110313416},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0985860526561737},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.07623368501663208},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07599499821662903},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/aciiw57231.2022.10085991","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aciiw57231.2022.10085991","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 10th International Conference on Affective Computing and Intelligent Interaction Workshops and Demos (ACIIW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7900000214576721,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W2625397849","https://openalex.org/W2742542661","https://openalex.org/W2747172199","https://openalex.org/W2902623517","https://openalex.org/W2905841571","https://openalex.org/W2939274787","https://openalex.org/W2964128364","https://openalex.org/W3015988193","https://openalex.org/W3036601975","https://openalex.org/W3082167223","https://openalex.org/W3088876910","https://openalex.org/W3096761643","https://openalex.org/W3162538529","https://openalex.org/W3198528147","https://openalex.org/W3198771897","https://openalex.org/W4221089191","https://openalex.org/W4250482878","https://openalex.org/W4251867726","https://openalex.org/W4285251897","https://openalex.org/W4297697709","https://openalex.org/W4304098636","https://openalex.org/W4361994820","https://openalex.org/W4362650920","https://openalex.org/W6638667902","https://openalex.org/W6780218876"],"related_works":["https://openalex.org/W2064370490","https://openalex.org/W3166813893","https://openalex.org/W2910013580","https://openalex.org/W2391900574","https://openalex.org/W3200958703","https://openalex.org/W1990078780","https://openalex.org/W3108667266","https://openalex.org/W2376619307","https://openalex.org/W3020211967","https://openalex.org/W3042022268"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3],"demonstrated":[4],"the":[5,41,71,91,104],"benefit":[6],"of":[7,73],"using":[8],"a":[9,57,79,109],"pre-trained":[10,29],"model":[11,30,39,110],"to":[12,16,69,90],"extract":[13],"acoustic":[14,101,105],"embedding":[15,106],"jointly":[17],"predict":[18],"(multitask":[19],"learning)":[20],"three":[21,65],"tasks:":[22],"emotion,":[23],"age,":[24],"and":[25,37,47,86,121],"native":[26],"country.":[27],"The":[28,45,76],"was":[31,56,67,78],"trained":[32,111],"with":[33,82],"wav2vec":[34],"2.0":[35],"large":[36],"robust":[38],"on":[40,99,112],"speech":[42,115],"emotion":[43,46],"corpus.":[44],"age":[48],"tasks":[49],"were":[50],"regression":[51],"problems,":[52],"while":[53],"country":[54],"prediction":[55],"classification":[58],"task.":[59],"A":[60],"single":[61],"harmonic":[62],"mean":[63],"from":[64,108,128],"metrics":[66],"used":[68],"evaluate":[70],"performance":[72],"multitask":[74,97],"learning.":[75],"classifier":[77],"linear":[80],"network":[81],"two":[83],"independent":[84],"layers":[85,88],"shared":[87],"connected":[89],"output":[92],"layers.":[93],"This":[94],"study":[95],"explores":[96],"learning":[98],"different":[100],"features":[102],"(including":[103],"extracted":[107],"an":[113],"affective":[114],"dataset),":[116],"seed":[117],"numbers,":[118],"batch":[119],"sizes,":[120],"waveform":[122],"normalizations":[123],"for":[124],"predicting":[125],"paralinguistic":[126],"information":[127],"speech.":[129]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
