{"id":"https://openalex.org/W4283764477","doi":"https://doi.org/10.1109/tnnls.2022.3184821","title":"Polarity Loss: Improving Visual-Semantic Alignment for Zero-Shot Detection","display_name":"Polarity Loss: Improving Visual-Semantic Alignment for Zero-Shot Detection","publication_year":2022,"publication_date":"2022-06-30","ids":{"openalex":"https://openalex.org/W4283764477","doi":"https://doi.org/10.1109/tnnls.2022.3184821","pmid":"https://pubmed.ncbi.nlm.nih.gov/35771782"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2022.3184821","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2022.3184821","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007895165","display_name":"Shafin Rahman","orcid":"https://orcid.org/0000-0001-7169-0318"},"institutions":[{"id":"https://openalex.org/I157386601","display_name":"North South University","ror":"https://ror.org/05wdbfp45","country_code":"BD","type":"education","lineage":["https://openalex.org/I157386601"]}],"countries":["BD"],"is_corresponding":true,"raw_author_name":"Shafin Rahman","raw_affiliation_strings":["Department of Electrical and Computer Engineering, North South University, Dhaka, Bangladesh"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, North South University, Dhaka, Bangladesh","institution_ids":["https://openalex.org/I157386601"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000300751","display_name":"Salman Khan","orcid":"https://orcid.org/0000-0002-9502-1749"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]},{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE","AU"],"is_corresponding":false,"raw_author_name":"Salman Khan","raw_affiliation_strings":["Department of Computer Vision, Mohamed bin Zayed University of Artificial Intelligence, Abu Dhabi, United Arab Emirates","Research School of Engineering, The Australian National University, Canberra, ACT, Australia"],"affiliations":[{"raw_affiliation_string":"Department of Computer Vision, Mohamed bin Zayed University of Artificial Intelligence, Abu Dhabi, United Arab Emirates","institution_ids":["https://openalex.org/I4210113480"]},{"raw_affiliation_string":"Research School of Engineering, The Australian National University, Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072837153","display_name":"Nick Barnes","orcid":"https://orcid.org/0000-0002-9343-9535"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Nick Barnes","raw_affiliation_strings":["School of Computing, The Australian National University, Canberra, ACT, Australia"],"affiliations":[{"raw_affiliation_string":"School of Computing, The Australian National University, Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5007895165"],"corresponding_institution_ids":["https://openalex.org/I157386601"],"apc_list":null,"apc_paid":null,"fwci":1.7936,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.87110276,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"36","issue":"3","first_page":"4066","last_page":"4078"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7154845595359802},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6416635513305664},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6004700064659119},{"id":"https://openalex.org/keywords/pascal","display_name":"Pascal (unit)","score":0.5716811418533325},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.5364686846733093},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5355634093284607},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5190187692642212},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.4555770456790924},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4544166028499603},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.35718271136283875},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.12495136260986328},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.11813926696777344}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7154845595359802},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6416635513305664},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6004700064659119},{"id":"https://openalex.org/C75608658","wikidata":"https://www.wikidata.org/wiki/Q44395","display_name":"Pascal (unit)","level":2,"score":0.5716811418533325},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.5364686846733093},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5355634093284607},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5190187692642212},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.4555770456790924},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4544166028499603},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.35718271136283875},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.12495136260986328},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.11813926696777344},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tnnls.2022.3184821","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2022.3184821","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:35771782","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35771782","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null},{"id":"pmh:oai:openresearch-repository.anu.edu.au:1885/733751973","is_oa":false,"landing_page_url":"https://hdl.handle.net/1885/733751973","pdf_url":null,"source":{"id":"https://openalex.org/S4306402539","display_name":"ANU Open Research (Australian National University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I118347636","host_organization_name":"Australian National University","host_organization_lineage":["https://openalex.org/I118347636"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"Journal article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.46000000834465027,"display_name":"Peace, Justice and strong institutions"},{"id":"https://metadata.un.org/sdg/10","score":0.41999998688697815,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W7746136","https://openalex.org/W129091472","https://openalex.org/W146900863","https://openalex.org/W639708223","https://openalex.org/W1861492603","https://openalex.org/W1875842236","https://openalex.org/W2007972815","https://openalex.org/W2031489346","https://openalex.org/W2081580037","https://openalex.org/W2098411764","https://openalex.org/W2128532956","https://openalex.org/W2193145675","https://openalex.org/W2194775991","https://openalex.org/W2209594346","https://openalex.org/W2250539671","https://openalex.org/W2277195237","https://openalex.org/W2334493732","https://openalex.org/W2441043183","https://openalex.org/W2552383788","https://openalex.org/W2565639579","https://openalex.org/W2570343428","https://openalex.org/W2604808168","https://openalex.org/W2607642691","https://openalex.org/W2611632661","https://openalex.org/W2724492314","https://openalex.org/W2747053578","https://openalex.org/W2783837693","https://openalex.org/W2789366140","https://openalex.org/W2884561390","https://openalex.org/W2887567284","https://openalex.org/W2888728082","https://openalex.org/W2894651257","https://openalex.org/W2905535961","https://openalex.org/W2963052338","https://openalex.org/W2963097641","https://openalex.org/W2963150001","https://openalex.org/W2963499153","https://openalex.org/W2963626623","https://openalex.org/W2963735856","https://openalex.org/W2963840853","https://openalex.org/W2963846885","https://openalex.org/W2963854535","https://openalex.org/W2963936013","https://openalex.org/W2964350399","https://openalex.org/W2985891137","https://openalex.org/W2997998901","https://openalex.org/W3044379940","https://openalex.org/W3099554308","https://openalex.org/W3106250896","https://openalex.org/W3109283569","https://openalex.org/W3202858701","https://openalex.org/W4294170691","https://openalex.org/W6637373629","https://openalex.org/W6704559304","https://openalex.org/W6714138976","https://openalex.org/W6730360293","https://openalex.org/W6747323385","https://openalex.org/W6752354721","https://openalex.org/W6755057408","https://openalex.org/W6766481256","https://openalex.org/W6910546390"],"related_works":["https://openalex.org/W4286681602","https://openalex.org/W3209312100","https://openalex.org/W4376620596","https://openalex.org/W3177249605","https://openalex.org/W2534152068","https://openalex.org/W3138508047","https://openalex.org/W1972515067","https://openalex.org/W4293054914","https://openalex.org/W1689909837","https://openalex.org/W4298525700"],"abstract_inverted_index":{"Conventional":[0],"object":[1,29,38,207],"detection":[2,30],"models":[3],"require":[4],"large":[5],"amounts":[6],"of":[7,138,233],"training":[8],"data.":[9],"In":[10,100],"comparison,":[11],"humans":[12],"can":[13,92],"recognize":[14,34],"previously":[15],"unseen":[16,70,90],"objects":[17,91],"by":[18,40,179],"merely":[19],"knowing":[20],"their":[21,43,97],"semantic":[22,44,58,85,98,129,149,188],"description.":[23],"To":[24],"mimic":[25],"similar":[26],"behavior,":[27],"zero-shot":[28],"(ZSD)":[31],"aims":[32],"to":[33,51,68,76,141,165,190],"and":[35,57,84,148,162,172,203,221,238],"localize":[36],"\"unseen\"":[37],"instances":[39],"using":[41,95],"only":[42,96],"information.":[45],"The":[46],"model":[47],"is":[48,177],"first":[49],"trained":[50],"learn":[52],"the":[53,65,77,89,122,127,152,158,214,234],"relationships":[54],"between":[55,82,146,160,169],"visual":[56,83,147,204],"domains":[59],"for":[60,79,117],"seen":[61],"objects,":[62],"later":[63],"transferring":[64],"acquired":[66],"knowledge":[67],"totally":[69],"objects.":[71,174],"This":[72],"setting":[73],"gives":[74],"rise":[75],"need":[78],"correct":[80,114],"alignment":[81,116],"concepts":[86,140,200],"so":[87],"that":[88,112,185],"be":[93,191],"identified":[94],"attributes.":[99],"this":[101],"article,":[102],"we":[103],"propose":[104],"a":[105,135,143],"novel":[106],"loss":[107],"function":[108],"called":[109],"\"polarity":[110],"loss\"":[111],"promotes":[113],"visual-semantic":[115],"an":[118],"improved":[119],"ZSD.":[120],"On":[121,151],"one":[123],"hand,":[124,154],"it":[125,155],"refines":[126],"noisy":[128],"embeddings":[130],"via":[131],"metric":[132],"learning":[133],"on":[134,213],"\"semantic":[136],"vocabulary\"":[137],"related":[139,198],"establish":[142],"better":[144,167],"synergy":[145],"domains.":[150],"other":[153],"explicitly":[156],"maximizes":[157],"gap":[159],"positive":[161],"negative":[163],"predictions":[164],"achieve":[166],"discrimination":[168],"seen,":[170],"unseen,":[171],"background":[173],"Our":[175,236],"approach":[176],"inspired":[178],"embodiment":[180],"theories":[181],"in":[182,193,218],"cognitive":[183],"science":[184],"claim":[186],"human":[187],"understanding":[189],"grounded":[192],"past":[194],"experiences":[195],"(seen":[196],"objects),":[197],"linguistic":[199],"(word":[201],"vocabulary),":[202],"perception":[205],"(seen/unseen":[206],"images).":[208],"We":[209],"conduct":[210],"extensive":[211],"evaluations":[212],"Microsoft":[215],"Common":[216],"Objects":[217],"Context":[219],"(MS-COCO)":[220],"Pascal":[222],"Visual":[223],"Object":[224],"Classes":[225],"(VOC)":[226],"datasets,":[227],"showing":[228],"significant":[229],"improvements":[230],"over":[231],"state":[232],"art.":[235],"code":[237],"evaluation":[239],"protocols":[240],"available":[241],"at:":[242],"https://github.com/salman-h-khan/PL-ZSD_Release.":[243]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":5}],"updated_date":"2026-03-16T09:10:04.655348","created_date":"2025-10-10T00:00:00"}
