{"id":"https://openalex.org/W4416750563","doi":"https://doi.org/10.1109/iros60139.2025.11247667","title":"PAVLM: Advancing Point Cloud based Affordance Understanding Via Vision-Language Model","display_name":"PAVLM: Advancing Point Cloud based Affordance Understanding Via Vision-Language Model","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416750563","doi":"https://doi.org/10.1109/iros60139.2025.11247667"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11247667","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11247667","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059241924","display_name":"Shang-Ching Liu","orcid":"https://orcid.org/0009-0001-3484-8646"},"institutions":[{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]},{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Shang-Ching Liu","raw_affiliation_strings":["University of Hamburg,Technical Aspects of Multimodal Systems (TAMS),Department of Informatics"],"affiliations":[{"raw_affiliation_string":"University of Hamburg,Technical Aspects of Multimodal Systems (TAMS),Department of Informatics","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030285894","display_name":"Van Nhiem Tran","orcid":"https://orcid.org/0000-0001-6941-0348"},"institutions":[{"id":"https://openalex.org/I1283473643","display_name":"Honda (Japan)","ror":"https://ror.org/03jzay846","country_code":"JP","type":"company","lineage":["https://openalex.org/I1283473643"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Van Nhiem Tran","raw_affiliation_strings":["Hon Hai Research Institute (HHRI)"],"affiliations":[{"raw_affiliation_string":"Hon Hai Research Institute (HHRI)","institution_ids":["https://openalex.org/I1283473643"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101542024","display_name":"Wenkai Chen","orcid":"https://orcid.org/0000-0003-0169-8896"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Wenkai Chen","raw_affiliation_strings":["University of Hamburg,Technical Aspects of Multimodal Systems (TAMS),Department of Informatics"],"affiliations":[{"raw_affiliation_string":"University of Hamburg,Technical Aspects of Multimodal Systems (TAMS),Department of Informatics","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101088254","display_name":"Wei-Lun Cheng","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Wei-Lun Cheng","raw_affiliation_strings":["National Taiwan University,Department of Electrical Engineering"],"affiliations":[{"raw_affiliation_string":"National Taiwan University,Department of Electrical Engineering","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103540516","display_name":"Yen\u2010Lin Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yen-Lin Huang","raw_affiliation_strings":["National Tsinghua University,Department of Computer Science and Technology"],"affiliations":[{"raw_affiliation_string":"National Tsinghua University,Department of Computer Science and Technology","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081460934","display_name":"I-Bin Liao","orcid":null},"institutions":[{"id":"https://openalex.org/I1283473643","display_name":"Honda (Japan)","ror":"https://ror.org/03jzay846","country_code":"JP","type":"company","lineage":["https://openalex.org/I1283473643"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"I-Bin Liao","raw_affiliation_strings":["Hon Hai Research Institute (HHRI)"],"affiliations":[{"raw_affiliation_string":"Hon Hai Research Institute (HHRI)","institution_ids":["https://openalex.org/I1283473643"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048205934","display_name":"Yung\u2010Hui Li","orcid":"https://orcid.org/0000-0002-0475-3689"},"institutions":[{"id":"https://openalex.org/I1283473643","display_name":"Honda (Japan)","ror":"https://ror.org/03jzay846","country_code":"JP","type":"company","lineage":["https://openalex.org/I1283473643"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yung-Hui Li","raw_affiliation_strings":["Hon Hai Research Institute (HHRI)"],"affiliations":[{"raw_affiliation_string":"Hon Hai Research Institute (HHRI)","institution_ids":["https://openalex.org/I1283473643"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100326977","display_name":"Jianwei Zhang","orcid":"https://orcid.org/0000-0003-2728-3357"},"institutions":[{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]},{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jianwei Zhang","raw_affiliation_strings":["University of Hamburg,Technical Aspects of Multimodal Systems (TAMS),Department of Informatics"],"affiliations":[{"raw_affiliation_string":"University of Hamburg,Technical Aspects of Multimodal Systems (TAMS),Department of Informatics","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5059241924"],"corresponding_institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37140594,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4299","last_page":"4306"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.4828000068664551,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.4828000068664551,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.23819999396800995,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.06030000001192093,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/affordance","display_name":"Affordance","score":0.9624000191688538},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5934000015258789},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5641999840736389},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.5194000005722046},{"id":"https://openalex.org/keywords/language-understanding","display_name":"Language understanding","score":0.5177000164985657},{"id":"https://openalex.org/keywords/point-cloud","display_name":"Point cloud","score":0.5099999904632568},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5069000124931335},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.4253999888896942}],"concepts":[{"id":"https://openalex.org/C194995250","wikidata":"https://www.wikidata.org/wiki/Q531136","display_name":"Affordance","level":2,"score":0.9624000191688538},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7387999892234802},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.617900013923645},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5934000015258789},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5641999840736389},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.5194000005722046},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.5177000164985657},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.5099999904632568},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5069000124931335},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48919999599456787},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.4253999888896942},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.42500001192092896},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.42410001158714294},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.38530001044273376},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.3736000061035156},{"id":"https://openalex.org/C2780878386","wikidata":"https://www.wikidata.org/wiki/Q1659648","display_name":"Visual language","level":2,"score":0.35429999232292175},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.33889999985694885},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.33320000767707825},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3215999901294708},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3019999861717224},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.267300009727478},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.2667999863624573},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11247667","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11247667","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2561523096","https://openalex.org/W2889895098","https://openalex.org/W2914996824","https://openalex.org/W2962984928","https://openalex.org/W3153149165","https://openalex.org/W3175450634","https://openalex.org/W4283788863","https://openalex.org/W4312788538","https://openalex.org/W4312818263","https://openalex.org/W4313003756","https://openalex.org/W4383097638","https://openalex.org/W4386071707","https://openalex.org/W4386076097","https://openalex.org/W4390874324","https://openalex.org/W4396782959","https://openalex.org/W4401414292","https://openalex.org/W4402702986","https://openalex.org/W4402727761","https://openalex.org/W4402916210","https://openalex.org/W4402961666","https://openalex.org/W4405786528"],"related_works":[],"abstract_inverted_index":{"Affordance":[0,67],"understanding,":[1],"the":[2,25,50,75,115,128,138],"task":[3],"of":[4,89,164],"identifying":[5],"actionable":[6],"regions":[7],"on":[8,137],"3D":[9,86,165],"objects,":[10],"plays":[11],"a":[12,98],"vital":[13],"role":[14],"in":[15,35,48,80,156],"allowing":[16],"robotic":[17,42],"systems":[18],"to":[19,84,96,110,122,159],"engage":[20],"with":[21,102,131],"and":[22,38,150],"operate":[23],"within":[24],"physical":[26,52],"world.":[27],"Although":[28],"Visual":[29],"Language":[30],"Models":[31],"(VLMs)":[32],"have":[33],"excelled":[34],"high-level":[36],"reasoning":[37],"long-horizon":[39],"planning":[40],"for":[41,55,147],"manipulation,":[43],"they":[44],"still":[45],"fall":[46],"short":[47],"grasping":[49],"nuanced":[51],"properties":[53],"required":[54],"effective":[56],"human-robot":[57],"interaction.":[58],"In":[59],"this":[60],"paper,":[61],"we":[62,118],"introduce":[63],"PAVLM":[64,92,143],"(Point":[65],"cloud":[66],"Vision-Language":[68],"Model),":[69],"an":[70,94],"innovative":[71],"framework":[72],"that":[73,142],"utilizes":[74],"extensive":[76],"multimodal":[77],"knowledge":[78],"embedded":[79],"pre-trained":[81],"language":[82,107,116],"models":[83,108,121],"enhance":[85],"affordance":[87,162],"understanding":[88],"point":[90,152],"cloud.":[91],"is":[93],"approach":[95],"integrates":[97],"geometric-guided":[99],"propagation":[100],"module":[101],"hidden":[103],"embeddings":[104],"from":[105],"large":[106],"(LLMs)":[109],"enrich":[111],"visual":[112],"semantics.":[113],"On":[114],"side,":[117],"prompt":[119],"Llama-3.1":[120],"generate":[123],"refined":[124],"context-aware":[125],"text,":[126],"augmenting":[127],"instructional":[129],"input":[130],"deeper":[132],"semantic":[133],"cues.":[134],"Experimental":[135],"results":[136],"3D-AffordanceNet":[139],"benchmark":[140],"demonstrate":[141],"outperforms":[144],"baseline":[145],"methods":[146],"both":[148],"full":[149],"partial":[151],"clouds,":[153],"particularly":[154],"excelling":[155],"its":[157],"generalization":[158],"novel":[160],"open-world":[161],"tasks":[163],"objects.":[166],"For":[167],"more":[168],"information,":[169],"visit":[170],"our":[171],"project":[172],"site:":[173],"pavlm-source.github.io.":[174]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-28T00:00:00"}
