{"id":"https://openalex.org/W7128606376","doi":"https://doi.org/10.48550/arxiv.2602.09295","title":"Positive-Unlabelled Active Learning to Curate a Dataset for Orca Resident Interpretation","display_name":"Positive-Unlabelled Active Learning to Curate a Dataset for Orca Resident Interpretation","publication_year":2026,"publication_date":"2026-02-10","ids":{"openalex":"https://openalex.org/W7128606376","doi":"https://doi.org/10.48550/arxiv.2602.09295"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.09295","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125637165","display_name":"Bret Nestor","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Nestor, Bret","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125653275","display_name":"Bohan Yao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Bohan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125655123","display_name":"Jasmine Moore","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Moore, Jasmine","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5079532993","display_name":"Jasper Kanes","orcid":"https://orcid.org/0000-0002-5065-2301"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kanes, Jasper","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5125637165"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10659","display_name":"Marine animal studies overview","score":0.40549999475479126,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10659","display_name":"Marine animal studies overview","score":0.40549999475479126,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11665","display_name":"Animal Vocal Communication and Behavior","score":0.3619000017642975,"subfield":{"id":"https://openalex.org/subfields/1309","display_name":"Developmental Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11387","display_name":"Ichthyology and Marine Biology","score":0.038600001484155655,"subfield":{"id":"https://openalex.org/subfields/2309","display_name":"Nature and Landscape Conservation"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/endangered-species","display_name":"Endangered species","score":0.6133000254631042},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5529999732971191},{"id":"https://openalex.org/keywords/habitat","display_name":"Habitat","score":0.45019999146461487},{"id":"https://openalex.org/keywords/test-data","display_name":"Test data","score":0.4068000018596649},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.37630000710487366},{"id":"https://openalex.org/keywords/humpback-whale","display_name":"Humpback whale","score":0.33070001006126404}],"concepts":[{"id":"https://openalex.org/C179345059","wikidata":"https://www.wikidata.org/wiki/Q11394","display_name":"Endangered species","level":3,"score":0.6133000254631042},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5529999732971191},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5267000198364258},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5034000277519226},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4846999943256378},{"id":"https://openalex.org/C185933670","wikidata":"https://www.wikidata.org/wiki/Q52105","display_name":"Habitat","level":2,"score":0.45019999146461487},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.4068000018596649},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.37630000710487366},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.3422999978065491},{"id":"https://openalex.org/C2777324915","wikidata":"https://www.wikidata.org/wiki/Q132905","display_name":"Humpback whale","level":3,"score":0.33070001006126404},{"id":"https://openalex.org/C10860467","wikidata":"https://www.wikidata.org/wiki/Q296519","display_name":"Ecotype","level":2,"score":0.31690001487731934},{"id":"https://openalex.org/C505870484","wikidata":"https://www.wikidata.org/wiki/Q180538","display_name":"Fishery","level":1,"score":0.3125},{"id":"https://openalex.org/C2776384079","wikidata":"https://www.wikidata.org/wiki/Q382441","display_name":"Marine mammal","level":2,"score":0.2962999939918518},{"id":"https://openalex.org/C2776840061","wikidata":"https://www.wikidata.org/wiki/Q149069","display_name":"Bottlenose dolphin","level":2,"score":0.2671999931335449},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.258899986743927},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2587999999523163},{"id":"https://openalex.org/C34951282","wikidata":"https://www.wikidata.org/wiki/Q864191","display_name":"Bioacoustics","level":2,"score":0.2574000060558319}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.09295","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.09295","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.09295","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.09295","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.5738950967788696}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0,185],"work":[1],"presents":[2],"the":[3,34,134,209,216,219],"largest":[4],"curation":[5],"of":[6,40,46,57,72,78,85,109,125,141,146,152,160,165,171,180,218,225],"Southern":[7],"Resident":[8],"Killer":[9],"Whale":[10],"(SRKW)":[11],"acoustic":[12],"data":[13,32,154,212],"to":[14,53,96],"date,":[15],"also":[16],"containing":[17],"other":[18],"marine":[19,58,182],"mammals":[20],"in":[21,76],"their":[22],"environment.":[23],"We":[24,137],"systematically":[25],"search":[26,44],"all":[27,55],"available":[28,203,214],"public":[29],"archival":[30],"hydrophone":[31],"within":[33],"SRKW":[35,142,186],"habitat":[36,235],"(over":[37],"30":[38],"years":[39],"audio":[41,211],"data).":[42],"The":[43,60,83,198,222],"consists":[45],"a":[47,106,122],"weakly-supervised,":[48],"positive-unlabelled,":[49],"active":[50],"learning":[51],"strategy":[52],"identify":[54],"instances":[56],"mammals.":[59],"resulting":[61],"transformer-based":[62],"presence":[63],"or":[64],"absence":[65],"classifiers":[66,69],"outperform":[67],"state-of-the-art":[68],"on":[70,133],"3":[71],"4":[73,114],"expert-annotated":[74],"datasets":[75],"terms":[77],"accuracy":[79,108,124],"and":[80,117,176,195,208,238],"energy":[81],"efficiency.":[82],"fleet":[84],"WHISPER":[86],"detection":[87],"models":[88],"range":[89],"from":[90,155],"0.58":[91],"(0.48-0.67)":[92],"AUROC":[93],"with":[94,99],"WHISPER-tiny":[95],"0.77":[97],"(0.63-0.93)":[98],"WHISPER-large-v3.":[100],"Our":[101],"multiclass":[102],"species":[103,200],"classifier":[104,120],"obtains":[105,121],"top-1":[107,123],"53.2\\%":[110],"(11":[111],"train":[112,128],"classes,":[113,129],"test":[115,131],"classes)":[116,132],"our":[118],"ecotype":[119],"33.6\\%":[126],"(4":[127],"5":[130],"DCLDE-2026":[135],"dataset.":[136],"yield":[138],"919":[139],"hours":[140,145,151,159,164,170,179],"data,":[143,149,162,168,175],"230":[144],"Bigg's":[147],"orca":[148,153],"1374":[150],"unlabelled":[156],"ecotypes,":[157],"1501":[158],"humpback":[161],"88":[163],"sea":[166],"lion":[167],"246":[169],"pacific":[172],"white-sided":[173],"dolphin":[174],"over":[177],"784":[178],"unspecified":[181],"mammal":[183],"data.":[184],"dataset":[187,227],"is":[188],"larger":[189],"than":[190],"DCLDE-2026,":[191],"Ocean":[192],"Networks":[193],"Canada,":[194],"OrcaSound":[196],"combined.":[197],"curated":[199],"labels":[201],"are":[202,213],"under":[204,215],"CC-BY":[205],"4.0":[206],"license,":[207],"corresponding":[210],"licenses":[217],"original":[220],"owners.":[221],"comprehensive":[223],"nature":[224],"this":[226,242],"makes":[228],"it":[229],"suitable":[230],"for":[231,241],"unsupervised":[232],"machine":[233],"translation,":[234],"usage":[236],"surveys,":[237],"conservation":[239],"endeavours":[240],"critically":[243],"endangered":[244],"ecotype.":[245]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-02-12T00:00:00"}
