{"id":"https://openalex.org/W7117488643","doi":"https://doi.org/10.1109/dicta68720.2025.11302434","title":"Split-Fuse-Transport: Annotation-Free Saliency via Dual Clustering and Optimal Transport Alignment","display_name":"Split-Fuse-Transport: Annotation-Free Saliency via Dual Clustering and Optimal Transport Alignment","publication_year":2025,"publication_date":"2025-12-03","ids":{"openalex":"https://openalex.org/W7117488643","doi":"https://doi.org/10.1109/dicta68720.2025.11302434"},"language":null,"primary_location":{"id":"doi:10.1109/dicta68720.2025.11302434","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dicta68720.2025.11302434","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Conference on Digital Image Computing: Techniques and Applications (DICTA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035818012","display_name":"Muhammad Umer Ramzan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135941","display_name":"GIFT University","ror":"https://ror.org/04tb22w36","country_code":"PK","type":"education","lineage":["https://openalex.org/I4210135941"]}],"countries":["PK"],"is_corresponding":true,"raw_author_name":"Muhammad Umer Ramzan","raw_affiliation_strings":["GIFT University,Pakistan"],"affiliations":[{"raw_affiliation_string":"GIFT University,Pakistan","institution_ids":["https://openalex.org/I4210135941"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121443991","display_name":"Ali Zia","orcid":null},"institutions":[{"id":"https://openalex.org/I196829312","display_name":"La Trobe University","ror":"https://ror.org/01rxfrp27","country_code":"AU","type":"education","lineage":["https://openalex.org/I196829312"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Ali Zia","raw_affiliation_strings":["La Trobe University,Australia"],"affiliations":[{"raw_affiliation_string":"La Trobe University,Australia","institution_ids":["https://openalex.org/I196829312"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054763664","display_name":"Abdelwahed Khamis","orcid":"https://orcid.org/0000-0002-3475-3479"},"institutions":[{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"funder","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Abdelwahed Khamis","raw_affiliation_strings":["CSIRO,Australia"],"affiliations":[{"raw_affiliation_string":"CSIRO,Australia","institution_ids":["https://openalex.org/I1292875679"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121487747","display_name":"Noman Ali","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135941","display_name":"GIFT University","ror":"https://ror.org/04tb22w36","country_code":"PK","type":"education","lineage":["https://openalex.org/I4210135941"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Noman Ali","raw_affiliation_strings":["GIFT University,Pakistan"],"affiliations":[{"raw_affiliation_string":"GIFT University,Pakistan","institution_ids":["https://openalex.org/I4210135941"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121465252","display_name":"Usman Ali","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135941","display_name":"GIFT University","ror":"https://ror.org/04tb22w36","country_code":"PK","type":"education","lineage":["https://openalex.org/I4210135941"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Usman Ali","raw_affiliation_strings":["GIFT University,Pakistan"],"affiliations":[{"raw_affiliation_string":"GIFT University,Pakistan","institution_ids":["https://openalex.org/I4210135941"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017910586","display_name":"Wei Xiang","orcid":"https://orcid.org/0009-0008-6169-1497"},"institutions":[{"id":"https://openalex.org/I196829312","display_name":"La Trobe University","ror":"https://ror.org/01rxfrp27","country_code":"AU","type":"education","lineage":["https://openalex.org/I196829312"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Wei Xiang","raw_affiliation_strings":["La Trobe University,Australia"],"affiliations":[{"raw_affiliation_string":"La Trobe University,Australia","institution_ids":["https://openalex.org/I196829312"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5035818012"],"corresponding_institution_ids":["https://openalex.org/I4210135941"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.66180396,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.0010000000474974513,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.0003000000142492354,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.7749999761581421},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5855000019073486},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.5519000291824341},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5095000267028809},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5081999897956848},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5062999725341797},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4593999981880188},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4537000060081482},{"id":"https://openalex.org/keywords/voting","display_name":"Voting","score":0.40209999680519104},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.40049999952316284}],"concepts":[{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.7749999761581421},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7196000218391418},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7020000219345093},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5855000019073486},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.5519000291824341},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5095000267028809},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5081999897956848},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5062999725341797},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.48559999465942383},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4593999981880188},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4537000060081482},{"id":"https://openalex.org/C520049643","wikidata":"https://www.wikidata.org/wiki/Q189760","display_name":"Voting","level":3,"score":0.40209999680519104},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.40049999952316284},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.39469999074935913},{"id":"https://openalex.org/C62354387","wikidata":"https://www.wikidata.org/wiki/Q875399","display_name":"Boundary (topology)","level":2,"score":0.36719998717308044},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.367000013589859},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.35530000925064087},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.35040000081062317},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.34040001034736633},{"id":"https://openalex.org/C198352243","wikidata":"https://www.wikidata.org/wiki/Q37105","display_name":"Line (geometry)","level":2,"score":0.3386000096797943},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.31779998540878296},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.2915000021457672},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.28619998693466187},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2842000126838684},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.2840999960899353},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2831999957561493},{"id":"https://openalex.org/C2778775528","wikidata":"https://www.wikidata.org/wiki/Q5135432","display_name":"Closing (real estate)","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.27730000019073486},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2766999900341034},{"id":"https://openalex.org/C105611402","wikidata":"https://www.wikidata.org/wiki/Q2976589","display_name":"Spectral clustering","level":3,"score":0.2727999985218048},{"id":"https://openalex.org/C2778751112","wikidata":"https://www.wikidata.org/wiki/Q835016","display_name":"Window (computing)","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.2529999911785126},{"id":"https://openalex.org/C182124507","wikidata":"https://www.wikidata.org/wiki/Q166154","display_name":"Line segment","level":2,"score":0.2513999938964844},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dicta68720.2025.11302434","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dicta68720.2025.11302434","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Conference on Digital Image Computing: Techniques and Applications (DICTA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W2121947440","https://openalex.org/W2295107390","https://openalex.org/W2412782625","https://openalex.org/W2552414813","https://openalex.org/W2569272946","https://openalex.org/W2724892359","https://openalex.org/W2740667773","https://openalex.org/W2777511827","https://openalex.org/W2961348656","https://openalex.org/W2962680827","https://openalex.org/W2963020481","https://openalex.org/W2963136160","https://openalex.org/W2963685207","https://openalex.org/W2963906836","https://openalex.org/W2990984982","https://openalex.org/W3025800305","https://openalex.org/W3030079400","https://openalex.org/W3034453930","https://openalex.org/W3094502228","https://openalex.org/W3098589008","https://openalex.org/W3107944836","https://openalex.org/W3108318504","https://openalex.org/W3113755791","https://openalex.org/W3200819120","https://openalex.org/W3204446219","https://openalex.org/W4214561053","https://openalex.org/W4221161778","https://openalex.org/W4221167012","https://openalex.org/W4226216113","https://openalex.org/W4294255647","https://openalex.org/W4313025803","https://openalex.org/W4366976290","https://openalex.org/W4385245566","https://openalex.org/W4393147872","https://openalex.org/W4402753749","https://openalex.org/W4407475717","https://openalex.org/W4411244723","https://openalex.org/W4412186558","https://openalex.org/W4413144663"],"related_works":[],"abstract_inverted_index":{"Salient":[0],"object":[1],"detection":[2],"(SOD)":[3],"aims":[4],"to":[5,152,184,192,200],"segment":[6],"visually":[7],"prominent":[8],"regions":[9],"in":[10,135,194],"images":[11],"and":[12,51,59,118,168,186],"serves":[13],"as":[14],"a":[15,33,136,146],"foundational":[16],"task":[17],"for":[18],"various":[19],"computer":[20],"vision":[21],"applications.":[22],"We":[23,44],"posit":[24],"that":[25,94,159,177],"SOD":[26,157],"can":[27],"now":[28],"reach":[29],"near-supervised":[30],"accuracy":[31,167],"without":[32,140],"single":[34,97,137],"pixel-level":[35],"label,":[36],"but":[37],"only":[38],"when":[39],"reliable":[40],"pseudo-masks":[41,134],"are":[42,109,123],"available.":[43],"revisit":[45],"the":[46,67,119,198],"prototype-based":[47],"line":[48],"of":[49,90],"work":[50],"make":[52],"two":[53,120],"key":[54],"observations.":[55],"First,":[56],"boundary":[57],"pixels":[58,61,108,115],"interior":[60],"obey":[62],"markedly":[63],"different":[64],"geometry,":[65],"second,":[66],"global":[68],"consistency":[69],"enforced":[70],"by":[71,111,116,126,182,190],"optimal":[72],"transport":[73],"(OT)":[74],"is":[75,80],"underutilised":[76],"if":[77],"prototype":[78,121],"quality":[79],"weak.":[81],"To":[82],"address":[83],"this,":[84],"we":[85],"introduce":[86],"POTNet,":[87],"an":[88,103,154],"adaptation":[89],"Prototypical":[91],"Optimal":[92],"Transport":[93],"replaces":[95],"POT's":[96],"<tex":[98],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[99],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$k$</tex>-means":[100],"step":[101],"with":[102],"entropy-guided":[104],"dual-clustering":[105],"head:":[106],"high-entropy":[107],"organised":[110],"spectral":[112],"clustering,":[113],"low-entropy":[114],"k-means,":[117],"sets":[122],"subsequently":[124],"aligned":[125],"OT.":[127],"This":[128],"split-fuse-transport":[129],"design":[130],"yields":[131],"sharper,":[132],"part-aware":[133],"forward":[138],"pass,":[139],"handcrafted":[141],"priors.":[142],"Those":[143],"masks":[144],"supervise":[145],"standard":[147],"MaskFormer-style":[148],"encoder-decoder,":[149],"giving":[150],"rise":[151],"AutoSOD,":[153],"end-to-end":[155],"unsupervised":[156,180],"pipeline":[158],"eliminates":[160],"SelfMask's":[161],"offline":[162],"voting":[163],"yet":[164],"improves":[165],"both":[166],"training":[169],"efficiency.":[170],"Extensive":[171],"experiments":[172],"on":[173],"five":[174],"benchmarks":[175],"show":[176],"AutoSOD":[178],"outperforms":[179],"methods":[181,189],"up":[183,191],"26%":[185],"weakly":[187],"supervised":[188,202],"36%":[193],"F-measure,":[195],"further":[196],"narrowing":[197],"gap":[199],"fully":[201],"models.":[203]},"counts_by_year":[],"updated_date":"2025-12-30T23:08:21.542490","created_date":"2025-12-29T00:00:00"}
