{"id":"https://openalex.org/W4408352732","doi":"https://doi.org/10.1109/icassp49660.2025.10889639","title":"Hybrid Losses for Hierarchical Embedding Learning","display_name":"Hybrid Losses for Hierarchical Embedding Learning","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408352732","doi":"https://doi.org/10.1109/icassp49660.2025.10889639"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10889639","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889639","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107126595","display_name":"Haokun Tian","orcid":null},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Haokun Tian","raw_affiliation_strings":["Queen Mary University of London,Center for Digital Music,London,UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,Center for Digital Music,London,UK","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053442723","display_name":"Stefan Lattner","orcid":"https://orcid.org/0000-0002-3945-7580"},"institutions":[{"id":"https://openalex.org/I4210131385","display_name":"Sony (France)","ror":"https://ror.org/03cr99w51","country_code":"FR","type":"company","lineage":["https://openalex.org/I4210131385","https://openalex.org/I4210143797"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Stefan Lattner","raw_affiliation_strings":["Sony Computer Science Laboratories,Music Team,Paris,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sony Computer Science Laboratories,Music Team,Paris,France","institution_ids":["https://openalex.org/I4210131385"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010404092","display_name":"Brian McFee","orcid":"https://orcid.org/0000-0001-6261-9747"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian McFee","raw_affiliation_strings":["New York University,Music and Audio Research Laboratory,New York,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"New York University,Music and Audio Research Laboratory,New York,USA","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083980012","display_name":"Charalampos Saitis","orcid":"https://orcid.org/0000-0002-6860-9723"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Charalampos Saitis","raw_affiliation_strings":["Queen Mary University of London,Center for Digital Music,London,UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,Center for Digital Music,London,UK","institution_ids":["https://openalex.org/I166337079"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02929242,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.7778000235557556,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.7778000235557556,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.6636999845504761,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6268576979637146},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6099507808685303},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35211628675460815}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6268576979637146},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6099507808685303},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35211628675460815}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10889639","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889639","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320314731","display_name":"UK Research and Innovation","ror":"https://ror.org/001aqnf71"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W2038484192","https://openalex.org/W2096733369","https://openalex.org/W2108598243","https://openalex.org/W2593116425","https://openalex.org/W2606611007","https://openalex.org/W2618530766","https://openalex.org/W2936044260","https://openalex.org/W2963350250","https://openalex.org/W2964189431","https://openalex.org/W2975043431","https://openalex.org/W3035102141","https://openalex.org/W3035406632","https://openalex.org/W3094550259","https://openalex.org/W3196974791","https://openalex.org/W3205139495","https://openalex.org/W4224918094","https://openalex.org/W4226442948","https://openalex.org/W4372260310","https://openalex.org/W4372266552","https://openalex.org/W4372346433","https://openalex.org/W4382998590","https://openalex.org/W4392576936","https://openalex.org/W6739076403","https://openalex.org/W6780333821","https://openalex.org/W6791353385","https://openalex.org/W6798064515","https://openalex.org/W6846591948","https://openalex.org/W6860598917"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2081900870","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890"],"abstract_inverted_index":{"In":[0],"traditional":[1],"supervised":[2],"learning,":[3],"the":[4,13,21,58,64,101],"cross-entropy":[5,41],"loss":[6],"treats":[7],"all":[8],"incorrect":[9],"predictions":[10],"equally,":[11],"ignoring":[12],"relevance":[14],"or":[15],"proximity":[16],"of":[17],"wrong":[18],"labels":[19,47],"to":[20,43,56,67,69,74,81],"correct":[22],"answer.":[23],"By":[24],"leveraging":[25],"a":[26,49,88],"tree":[27],"hierarchy":[28],"for":[29,78],"fine-grained":[30],"labels,":[31],"we":[32],"investigate":[33],"hybrid":[34,103],"losses,":[35,42],"such":[36],"as":[37],"generalised":[38],"triplet":[39],"and":[40,62,114],"enforce":[44],"similarity":[45],"between":[46],"within":[48],"multi-task":[50],"learning":[51],"framework.":[52],"We":[53],"propose":[54],"metrics":[55],"evaluate":[57],"embedding":[59,111],"space":[60,112],"structure":[61],"assess":[63],"model\u2019s":[65],"ability":[66],"generalise":[68],"unseen":[70,82],"classes,":[71],"that":[72,100],"is,":[73],"infer":[75],"similar":[76],"classes":[77],"data":[79],"belonging":[80],"categories.":[83],"Our":[84],"experiments":[85],"on":[86],"OrchideaSOL,":[87],"four-level":[89],"hierarchical":[90],"instrument":[91],"sound":[92],"dataset":[93],"with":[94],"nearly":[95],"200":[96],"detailed":[97],"categories,":[98],"demonstrate":[99],"proposed":[102],"losses":[104],"outperform":[105],"previous":[106],"works":[107],"in":[108],"classification,":[109],"retrieval,":[110],"structure,":[113],"generalisation.":[115]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
