{"id":"https://openalex.org/W4414876956","doi":"https://doi.org/10.1109/iccv51701.2025.02194","title":"Prompt Guidance and Human Proximal Perception for HOT Prediction with Regional Joint Loss","display_name":"Prompt Guidance and Human Proximal Perception for HOT Prediction with Regional Joint Loss","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4414876956","doi":"https://doi.org/10.1109/iccv51701.2025.02194"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.02194","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02194","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2507.01630","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100721978","display_name":"Yuxiao Wang","orcid":"https://orcid.org/0009-0001-7359-5231"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuxiao Wang","raw_affiliation_strings":["South China University of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"South China University of Technology","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085349645","display_name":"Lei Yu","orcid":"https://orcid.org/0000-0001-9245-6248"},"institutions":[{"id":"https://openalex.org/I4800084","display_name":"Southwest Jiaotong University","ror":"https://ror.org/00hn7w693","country_code":"CN","type":"education","lineage":["https://openalex.org/I4800084"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Lei","raw_affiliation_strings":["Southwest Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Southwest Jiaotong University","institution_ids":["https://openalex.org/I4800084"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054252180","display_name":"Zhenao Wei","orcid":"https://orcid.org/0000-0002-6541-1287"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenao Wei","raw_affiliation_strings":["South China University of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"South China University of Technology","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100312434","display_name":"Weiying Xue","orcid":null},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiying Xue","raw_affiliation_strings":["South China University of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"South China University of Technology","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002810785","display_name":"Xinyu Jiang","orcid":"https://orcid.org/0000-0002-5066-1259"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinyu Jiang","raw_affiliation_strings":["South China University of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"South China University of Technology","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081705021","display_name":"Nan Zhuang","orcid":"https://orcid.org/0009-0006-5023-2661"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Nan Zhuang","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100453156","display_name":"Qi Liu","orcid":"https://orcid.org/0000-0001-6956-5550"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qi Liu","raw_affiliation_strings":["South China University of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"South China University of Technology","institution_ids":["https://openalex.org/I90610280"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100721978"],"corresponding_institution_ids":["https://openalex.org/I90610280"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.30504983,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"23636","last_page":"23645"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10050","display_name":"Multi-Criteria Decision Making","score":0.7900999784469604,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10050","display_name":"Multi-Criteria Decision Making","score":0.7900999784469604,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.7612000107765198,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10427","display_name":"Visual perception and processing mechanisms","score":0.7161999940872192,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6352999806404114},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6182000041007996},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6044999957084656},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.585099995136261},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5669000148773193},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5291000008583069},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5170999765396118}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6668000221252441},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6352999806404114},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6182000041007996},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6044999957084656},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.585099995136261},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5703999996185303},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5669000148773193},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5291000008583069},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5170999765396118},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.477400004863739},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4404999911785126},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.4242999851703644},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.41609999537467957},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3950999975204468},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.34940001368522644},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34290000796318054},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3075999915599823},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.2896000146865845},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.2856999933719635},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2816999852657318}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.02194","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02194","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2507.01630","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.01630","pdf_url":"https://arxiv.org/pdf/2507.01630","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2507.01630","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2507.01630","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2507.01630","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.01630","pdf_url":"https://arxiv.org/pdf/2507.01630","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6653062578","display_name":null,"funder_award_id":"62202174","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,226],"task":[1],"of":[2,12,29,130,178,208],"Human-Object":[3],"conTact":[4],"(HOT)":[5],"detection":[6],"involves":[7],"identifying":[8],"the":[9,13,81,85,90,111,128,131,163,176,223],"specific":[10,49],"areas":[11,38],"human":[14,67,98],"body":[15],"that":[16,189],"are":[17,23,122,229],"touching":[18],"objects.":[19],"Nevertheless,":[20],"current":[21],"models":[22],"restricted":[24],"to":[25,33,44,79,104,116,158,174],"just":[26],"one":[27],"type":[28],"image,":[30],"often":[31],"leading":[32],"too":[34],"much":[35],"segmentation":[36],"in":[37,137,162,181,195,214],"with":[39],"little":[40],"interaction,":[41],"and":[42,66,94,135,212,218],"struggling":[43],"maintain":[45],"category":[46],"consistency":[47],"within":[48],"regions.":[50],"To":[51,70],"tackle":[52],"this":[53],"issue,":[54],"a":[55,75,97,138,142,146,155],"HOT":[56],"framework,":[57],"termed":[58],"\\textbf{P3HOT},":[59],"is":[60,102,172],"proposed,":[61],"which":[62],"blends":[63],"\\textbf{P}rompt":[64],"guidance":[65],"\\textbf{P}roximal":[68],"\\textbf{P}erception.":[69],"begin":[71],"with,":[72],"we":[73],"utilize":[74],"semantic-driven":[76],"prompt":[77],"mechanism":[78,101],"direct":[80],"network's":[82],"attention":[83],"towards":[84],"relevant":[86],"regions":[87,119],"based":[88],"on":[89,222],"correlation":[91],"between":[92,133],"image":[93],"text.":[95],"Then":[96],"proximal":[99],"perception":[100],"employed":[103],"dynamically":[105],"perceive":[106],"key":[107],"depth":[108,126],"range":[109],"around":[110],"human,":[112],"using":[113],"learnable":[114],"parameters":[115],"effectively":[117],"eliminate":[118],"where":[120],"interactions":[121],"not":[123],"expected.":[124],"Calculating":[125],"resolves":[127],"uncertainty":[129],"overlap":[132],"humans":[134],"objects":[136],"2D":[139],"perspective,":[140],"providing":[141],"quasi-3D":[143],"viewpoint.":[144],"Moreover,":[145],"Regional":[147],"Joint":[148],"Loss":[149],"(RJLoss)":[150],"has":[151],"been":[152],"created":[153],"as":[154],"new":[156,167],"loss":[157],"inhibit":[159],"abnormal":[160],"categories":[161],"same":[164],"area.":[165],"A":[166],"evaluation":[168],"metric":[169],"called":[170],"``AD-Acc.''":[171],"introduced":[173],"address":[175],"shortcomings":[177],"existing":[179],"methods":[180],"addressing":[182],"negative":[183],"samples.":[184],"Comprehensive":[185],"experimental":[186],"results":[187],"demonstrate":[188],"our":[190,203],"approach":[191],"achieves":[192,205],"state-of-the-art":[193],"performance":[194],"four":[196],"metrics":[197],"across":[198],"two":[199],"benchmark":[200],"datasets.":[201],"Specifically,":[202],"model":[204],"an":[206],"improvement":[207],"\\textbf{0.7}$\\uparrow$,":[209],"\\textbf{2.0}$\\uparrow$,":[210],"\\textbf{1.6}$\\uparrow$,":[211],"\\textbf{11.0}$\\uparrow$":[213],"SC-Acc.,":[215],"mIoU,":[216],"wIoU,":[217],"AD-Acc.":[219],"metrics,":[220],"respectively,":[221],"HOT-Annotated":[224],"dataset.":[225],"sources":[227],"code":[228],"available":[230],"at":[231],"https://github.com/YuxiaoWang-AI/P3HOT.":[232]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-10T00:00:00"}
