{"id":"https://openalex.org/W4417098555","doi":"https://doi.org/10.1016/j.ecoinf.2026.103765","title":"Foundation models for bioacoustics \u2013 A comparative review","display_name":"Foundation models for bioacoustics \u2013 A comparative review","publication_year":2026,"publication_date":"2026-04-27","ids":{"openalex":"https://openalex.org/W4417098555","doi":"https://doi.org/10.1016/j.ecoinf.2026.103765"},"language":"en","primary_location":{"id":"doi:10.1016/j.ecoinf.2026.103765","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.ecoinf.2026.103765","pdf_url":null,"source":{"id":"https://openalex.org/S195809937","display_name":"Ecological Informatics","issn_l":"1574-9541","issn":["1574-9541","1878-0512"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ecological Informatics","raw_type":"journal-article"},"type":"review","indexed_in":["arxiv","crossref","datacite","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1016/j.ecoinf.2026.103765","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092645223","display_name":"Raphael Schwinger","orcid":"https://orcid.org/0009-0001-8519-3571"},"institutions":[{"id":"https://openalex.org/I3133086891","display_name":"Hochschule f\u00fcr Angewandte Wissenschaften Kiel","ror":"https://ror.org/03q0ab227","country_code":"DE","type":"education","lineage":["https://openalex.org/I3133086891"]},{"id":"https://openalex.org/I32021983","display_name":"Christian-Albrechts-Universit\u00e4t zu Kiel","ror":"https://ror.org/04v76ef78","country_code":"DE","type":"education","lineage":["https://openalex.org/I32021983"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Raphael Schwinger","raw_affiliation_strings":["Intelligent Networked Systems (INS), Kiel University, Kiel, Germany"],"raw_orcid":"https://orcid.org/0009-0001-8519-3571","affiliations":[{"raw_affiliation_string":"Intelligent Networked Systems (INS), Kiel University, Kiel, Germany","institution_ids":["https://openalex.org/I3133086891","https://openalex.org/I32021983"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120089847","display_name":"Paria Vali Zadeh","orcid":null},"institutions":[{"id":"https://openalex.org/I3133086891","display_name":"Hochschule f\u00fcr Angewandte Wissenschaften Kiel","ror":"https://ror.org/03q0ab227","country_code":"DE","type":"education","lineage":["https://openalex.org/I3133086891"]},{"id":"https://openalex.org/I32021983","display_name":"Christian-Albrechts-Universit\u00e4t zu Kiel","ror":"https://ror.org/04v76ef78","country_code":"DE","type":"education","lineage":["https://openalex.org/I32021983"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Paria Vali Zadeh","raw_affiliation_strings":["Intelligent Networked Systems (INS), Kiel University, Kiel, Germany"],"raw_orcid":"https://orcid.org/0009-0007-8396-1585","affiliations":[{"raw_affiliation_string":"Intelligent Networked Systems (INS), Kiel University, Kiel, Germany","institution_ids":["https://openalex.org/I3133086891","https://openalex.org/I32021983"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023925855","display_name":"Lukas Rauch","orcid":"https://orcid.org/0000-0002-6552-3270"},"institutions":[{"id":"https://openalex.org/I106157433","display_name":"University of Kassel","ror":"https://ror.org/04zc7p361","country_code":"DE","type":"education","lineage":["https://openalex.org/I106157433"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Lukas Rauch","raw_affiliation_strings":["Intelligent Embedded Systems (IES), University of Kassel, Kassel, Germany"],"raw_orcid":"https://orcid.org/0000-0002-6552-3270","affiliations":[{"raw_affiliation_string":"Intelligent Embedded Systems (IES), University of Kassel, Kassel, Germany","institution_ids":["https://openalex.org/I106157433"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120697198","display_name":"Mats Kurz","orcid":null},"institutions":[{"id":"https://openalex.org/I3133086891","display_name":"Hochschule f\u00fcr Angewandte Wissenschaften Kiel","ror":"https://ror.org/03q0ab227","country_code":"DE","type":"education","lineage":["https://openalex.org/I3133086891"]},{"id":"https://openalex.org/I32021983","display_name":"Christian-Albrechts-Universit\u00e4t zu Kiel","ror":"https://ror.org/04v76ef78","country_code":"DE","type":"education","lineage":["https://openalex.org/I32021983"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Mats Kurz","raw_affiliation_strings":["Intelligent Networked Systems (INS), Kiel University, Kiel, Germany"],"raw_orcid":"https://orcid.org/0009-0007-2527-9078","affiliations":[{"raw_affiliation_string":"Intelligent Networked Systems (INS), Kiel University, Kiel, Germany","institution_ids":["https://openalex.org/I3133086891","https://openalex.org/I32021983"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120697199","display_name":"Tom Hauschild","orcid":null},"institutions":[{"id":"https://openalex.org/I3133086891","display_name":"Hochschule f\u00fcr Angewandte Wissenschaften Kiel","ror":"https://ror.org/03q0ab227","country_code":"DE","type":"education","lineage":["https://openalex.org/I3133086891"]},{"id":"https://openalex.org/I32021983","display_name":"Christian-Albrechts-Universit\u00e4t zu Kiel","ror":"https://ror.org/04v76ef78","country_code":"DE","type":"education","lineage":["https://openalex.org/I32021983"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Tom Hauschild","raw_affiliation_strings":["Intelligent Networked Systems (INS), Kiel University, Kiel, Germany"],"raw_orcid":"https://orcid.org/0009-0002-8928-0477","affiliations":[{"raw_affiliation_string":"Intelligent Networked Systems (INS), Kiel University, Kiel, Germany","institution_ids":["https://openalex.org/I3133086891","https://openalex.org/I32021983"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067680832","display_name":"Sam Lapp","orcid":"https://orcid.org/0000-0003-1637-6822"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sam Lapp","raw_affiliation_strings":["University of Pittsburgh, Pittsburgh, PA, USA"],"raw_orcid":"https://orcid.org/0000-0003-1637-6822","affiliations":[{"raw_affiliation_string":"University of Pittsburgh, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082220265","display_name":"Sven Tomforde","orcid":"https://orcid.org/0000-0002-5825-8915"},"institutions":[{"id":"https://openalex.org/I3133086891","display_name":"Hochschule f\u00fcr Angewandte Wissenschaften Kiel","ror":"https://ror.org/03q0ab227","country_code":"DE","type":"education","lineage":["https://openalex.org/I3133086891"]},{"id":"https://openalex.org/I32021983","display_name":"Christian-Albrechts-Universit\u00e4t zu Kiel","ror":"https://ror.org/04v76ef78","country_code":"DE","type":"education","lineage":["https://openalex.org/I32021983"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sven Tomforde","raw_affiliation_strings":["Intelligent Networked Systems (INS), Kiel University, Kiel, Germany"],"raw_orcid":"https://orcid.org/0000-0002-5825-8915","affiliations":[{"raw_affiliation_string":"Intelligent Networked Systems (INS), Kiel University, Kiel, Germany","institution_ids":["https://openalex.org/I3133086891","https://openalex.org/I32021983"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5092645223"],"corresponding_institution_ids":["https://openalex.org/I3133086891","https://openalex.org/I32021983"],"apc_list":{"value":2510,"currency":"USD","value_usd":2510},"apc_paid":{"value":2510,"currency":"USD","value_usd":2510},"fwci":18.8485,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.9806867,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"96","issue":null,"first_page":"103765","last_page":"103765"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11665","display_name":"Animal Vocal Communication and Behavior","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1309","display_name":"Developmental Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11665","display_name":"Animal Vocal Communication and Behavior","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1309","display_name":"Developmental Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.00019999999494757503,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12644","display_name":"Wildlife-Road Interactions and Conservation","score":0.0,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bioacoustics","display_name":"Bioacoustics","score":0.9521999955177307},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5471000075340271},{"id":"https://openalex.org/keywords/transferability","display_name":"Transferability","score":0.4724999964237213},{"id":"https://openalex.org/keywords/phonocardiogram","display_name":"Phonocardiogram","score":0.461899995803833},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.3955000042915344},{"id":"https://openalex.org/keywords/linear-model","display_name":"Linear model","score":0.35690000653266907},{"id":"https://openalex.org/keywords/biological-classification","display_name":"Biological classification","score":0.3508000075817108}],"concepts":[{"id":"https://openalex.org/C34951282","wikidata":"https://www.wikidata.org/wiki/Q864191","display_name":"Bioacoustics","level":2,"score":0.9521999955177307},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5623999834060669},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5471000075340271},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5026000142097473},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4853000044822693},{"id":"https://openalex.org/C61272859","wikidata":"https://www.wikidata.org/wiki/Q7834031","display_name":"Transferability","level":3,"score":0.4724999964237213},{"id":"https://openalex.org/C159693508","wikidata":"https://www.wikidata.org/wiki/Q3301075","display_name":"Phonocardiogram","level":2,"score":0.461899995803833},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.3955000042915344},{"id":"https://openalex.org/C163175372","wikidata":"https://www.wikidata.org/wiki/Q3339222","display_name":"Linear model","level":2,"score":0.35690000653266907},{"id":"https://openalex.org/C48702757","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Biological classification","level":2,"score":0.3508000075817108},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.33219999074935913},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.33169999718666077},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.31529998779296875},{"id":"https://openalex.org/C2992428333","wikidata":"https://www.wikidata.org/wiki/Q11461","display_name":"Sound production","level":2,"score":0.299699991941452},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2874000072479248},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.27390000224113464},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2669000029563904},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2662000060081482},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.25450000166893005}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1016/j.ecoinf.2026.103765","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.ecoinf.2026.103765","pdf_url":null,"source":{"id":"https://openalex.org/S195809937","display_name":"Ecological Informatics","issn_l":"1574-9541","issn":["1574-9541","1878-0512"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ecological Informatics","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2508.01277","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.01277","pdf_url":"https://arxiv.org/pdf/2508.01277","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2508.01277","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.01277","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"doi:10.5281/zenodo.19608334","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.19608334","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"thesis"},{"id":"doi:10.5281/zenodo.19608335","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.19608335","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"thesis"}],"best_oa_location":{"id":"doi:10.1016/j.ecoinf.2026.103765","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.ecoinf.2026.103765","pdf_url":null,"source":{"id":"https://openalex.org/S195809937","display_name":"Ecological Informatics","issn_l":"1574-9541","issn":["1574-9541","1878-0512"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ecological Informatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320323803","display_name":"Bundesministerium f\u00fcr Wirtschaft und Energie","ror":"https://ror.org/02vgg2808"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Automated":[0],"bioacoustic":[1,20,31,41,49,64,199,206,261],"analysis":[2,102],"is":[3,130,156],"essential":[4],"for":[5,158,194,260],"biodiversity":[6],"monitoring":[7],"and":[8,34,57,74,90,97,114,139,166,214,223,228,238],"conservation,":[9],"requiring":[10],"advanced":[11],"deep":[12],"learning":[13,51,174],"models":[14,33,86,170,182,196,208,259],"that":[15,104,128,153,167],"can":[16],"adapt":[17],"to":[18,197],"diverse":[19,124,245],"tasks.":[21,43,201,262],"This":[22],"article":[23],"presents":[24],"a":[25],"comprehensive":[26],"review":[27,63,204],"of":[28,48,84,151,163,205],"large-scale":[29],"pretrained":[30],"foundation":[32,65,207],"systematically":[35],"investigates":[36],"their":[37,68],"transferability":[38],"across":[39],"multiple":[40],"classification":[42,200],"We":[44],"provide":[45,192],"an":[46,80],"overview":[47],"representation":[50],"by":[52],"analysing":[53],"pretraining":[54],"data":[55],"sources":[56],"benchmarks.":[58],"On":[59],"this":[60],"basis,":[61],"we":[62,78],"models,":[66],"dissecting":[67],"training":[69,75,210,215],"data,":[70,211],"preprocessing,":[71,212],"augmentations,":[72],"architecture,":[73,213],"paradigms.":[76,216],"Additionally,":[77],"conduct":[79],"extensive":[81],"empirical":[82,219],"study":[83],"selected":[85],"on":[87,120,123,137,141,175,183,221,242],"the":[88,108,115,131,149,160],"BEANS":[89,142,184,222,243],"BirdSet":[91,110,138,224,236],"benchmarks,":[92],"evaluating":[93],"generalisability":[94],"under":[95,226],"linear":[96,117,227,240],"attentive":[98,154,188,229],"probing.":[99,189,230],"Our":[100],"experimental":[101],"reveals":[103],"Perch":[105,232],"2.0":[106,233],"achieves":[107,234],"highest":[109,235],"score":[111,237],"(restricted":[112],"evaluation)":[113],"strongest":[116,239],"probing":[118,155,241,251],"result":[119],"BEANS,":[121],"building":[122],"multi-taxa":[125,246],"supervised":[126,247],"pretraining;":[127],"BirdMAE":[129],"best":[132],"model":[133],"among":[134],"probing-based":[135],"strategies":[136],"second":[140],"after":[143],"BEATs":[144],"N":[145],"L":[146],"M":[147],",":[148],"encoder":[150],"NatureLM-audio;":[152],"beneficial":[157],"extracting":[159],"full":[161],"performance":[162,254],"transformer-based":[164,256],"models;":[165],"general-purpose":[168],"audio":[169,258],"trained":[171],"with":[172,187],"self-supervised":[173],"AudioSet":[176],"outperform":[177],"many":[178],"specialised":[179],"bird":[180],"sound":[181],"when":[185],"evaluated":[186],"These":[190],"findings":[191],"guidance":[193],"adapting":[195],"new":[198],"\u2022":[202,217,231,249],"Comprehensive":[203],"covering":[209],"Extensive":[218],"comparison":[220],"benchmarks":[225],"through":[244],"pretraining.":[248],"Attentive":[250],"unlocks":[252],"superior":[253],"from":[255],"general":[257]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-05-29T09:21:14.243279","created_date":"2025-10-10T00:00:00"}
