{"id":"https://openalex.org/W4415708680","doi":"https://doi.org/10.1109/icme59968.2025.11208917","title":"Towards Improved Deep Metric Learning via Unsupervised Object Location","display_name":"Towards Improved Deep Metric Learning via Unsupervised Object Location","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415708680","doi":"https://doi.org/10.1109/icme59968.2025.11208917"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11208917","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11208917","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038428900","display_name":"Changxin Ye","orcid":null},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changxin Ye","raw_affiliation_strings":["University of Science and Technology Beijing,School of Computer and Communication Engineering,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology Beijing,School of Computer and Communication Engineering,Beijing,China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100753373","display_name":"Yushan Zhang","orcid":"https://orcid.org/0000-0002-8366-9708"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yushan Zhang","raw_affiliation_strings":["University of Science and Technology Beijing,School of Computer and Communication Engineering,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology Beijing,School of Computer and Communication Engineering,Beijing,China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101694146","display_name":"Xinyi Xu","orcid":"https://orcid.org/0000-0002-4776-2809"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinyi Xu","raw_affiliation_strings":["University of Science and Technology Beijing,School of Computer and Communication Engineering,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology Beijing,School of Computer and Communication Engineering,Beijing,China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010340533","display_name":"Wei Huangfu","orcid":"https://orcid.org/0000-0003-2887-8395"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Huangfu","raw_affiliation_strings":["University of Science and Technology Beijing,School of Computer and Communication Engineering,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology Beijing,School of Computer and Communication Engineering,Beijing,China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015874725","display_name":"Cheng Deng","orcid":"https://orcid.org/0000-0003-2620-3247"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng Deng","raw_affiliation_strings":["Xidian University,School of Electronic Engineering,Xi&#x2019;an,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Xidian University,School of Electronic Engineering,Xi&#x2019;an,China","institution_ids":["https://openalex.org/I149594827"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.27246789,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.1850000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.1850000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.17710000276565552,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.16220000386238098,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.7400000095367432},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.6571999788284302},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.5782999992370605},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.574999988079071},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.574400007724762},{"id":"https://openalex.org/keywords/minimum-bounding-box","display_name":"Minimum bounding box","score":0.5680999755859375},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5670999884605408},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.544700026512146}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7975999712944031},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.7400000095367432},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7236999869346619},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.6571999788284302},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.5782999992370605},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.574999988079071},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.574400007724762},{"id":"https://openalex.org/C147037132","wikidata":"https://www.wikidata.org/wiki/Q6865426","display_name":"Minimum bounding box","level":3,"score":0.5680999755859375},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5670999884605408},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.544700026512146},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5030999779701233},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.48170000314712524},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.4072999954223633},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.37790000438690186},{"id":"https://openalex.org/C202615002","wikidata":"https://www.wikidata.org/wiki/Q783507","display_name":"Differentiable function","level":2,"score":0.37689998745918274},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.36739999055862427},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.3596999943256378},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.33889999985694885},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3386000096797943},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2822999954223633},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.27790001034736633},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2718000113964081},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.25850000977516174},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.2558000087738037}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11208917","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11208917","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320337504","display_name":"Research and Development","ror":"https://ror.org/027s68j25"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W2096733369","https://openalex.org/W2138011018","https://openalex.org/W2138621090","https://openalex.org/W2157364932","https://openalex.org/W2194775991","https://openalex.org/W2471768434","https://openalex.org/W2549858646","https://openalex.org/W2605102252","https://openalex.org/W2895347732","https://openalex.org/W2948077755","https://openalex.org/W2948638722","https://openalex.org/W2962723992","https://openalex.org/W2962898354","https://openalex.org/W2963026686","https://openalex.org/W2963350250","https://openalex.org/W2964271799","https://openalex.org/W2969656782","https://openalex.org/W2969985801","https://openalex.org/W2976818183","https://openalex.org/W2991234496","https://openalex.org/W2997997511","https://openalex.org/W3034202663","https://openalex.org/W3034456293","https://openalex.org/W3097096317","https://openalex.org/W3101432304","https://openalex.org/W3106778652","https://openalex.org/W3161599138","https://openalex.org/W4213341555","https://openalex.org/W4214609630","https://openalex.org/W4376274832","https://openalex.org/W4382395746","https://openalex.org/W4385245566","https://openalex.org/W4400188121"],"related_works":[],"abstract_inverted_index":{"Deep":[0,61],"Metric":[1,62],"Learning":[2,63],"(DML)":[3],"aims":[4],"at":[5],"learning":[6],"the":[7,44,55,76,79,93,97,101,106,118,125,129,142,148,158,162],"representation":[8],"of":[9,78],"fine-grained":[10,18],"data,":[11],"which":[12,89],"plays":[13],"a":[14,72,87],"vital":[15],"role":[16],"in":[17],"retrieval":[19],"and":[20,91,141],"classification":[21],"applications.":[22],"Previous":[23],"studies":[24],"have":[25],"shown":[26],"that":[27,145,154],"cropping":[28],"an":[29],"object":[30],"based":[31,95],"on":[32,82,96,137],"its":[33],"bounding":[34],"box":[35],"(BBox)":[36],"can":[37,156],"significantly":[38],"enhance":[39],"performance.":[40],"However,":[41],"manually":[42],"annotating":[43],"BBox":[45,56,77,130,159,163],"is":[46,108],"costly.":[47],"In":[48],"this":[49],"paper,":[50],"we":[51,152],"propose":[52],"to":[53,74,110,120],"predict":[54,75,157],"unsupervisedly,":[57],"termed":[58],"Towards":[59],"Improved":[60],"via":[64],"Unsupervised":[65],"Object":[66],"Location":[67],"(DML-OL).":[68],"DML-OL":[69,84,136,146,155],"first":[70],"proposes":[71],"BBoxNN":[73],"object.":[80],"Building":[81],"this,":[83],"further":[85],"introduces":[86],"CRNN,":[88],"crops":[90],"resizes":[92],"image":[94],"predicted":[98],"BBox.":[99],"Unlike":[100],"non-differentiable":[102],"naive":[103],"crop":[104],"operator,":[105],"CRNN":[107],"designed":[109],"be":[111,121],"fully":[112],"differentiable.":[113],"This":[114],"differentiability":[115],"property":[116],"enables":[117],"model":[119],"trained":[122],"end-to-end":[123],"using":[124],"pretext":[126],"task":[127],"without":[128,161],"labels.":[131,164],"We":[132],"evaluate":[133],"our":[134],"proposed":[135],"two":[138],"strong":[139],"baselines,":[140],"results":[143],"show":[144],"outperforms":[147],"compared":[149],"methods.":[150],"Additionally,":[151],"demonstrate":[153],"accurately":[160]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-30T00:00:00"}
