{"id":"https://openalex.org/W7162572827","doi":"https://doi.org/10.48550/arxiv.2605.26689","title":"PinPoint: Prompting with Informative Interior Points","display_name":"PinPoint: Prompting with Informative Interior Points","publication_year":2026,"publication_date":"2026-05-26","ids":{"openalex":"https://openalex.org/W7162572827","doi":"https://doi.org/10.48550/arxiv.2605.26689"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.26689","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.26689","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.26689","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5137092371","display_name":"Pouya Sadeghi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sadeghi, Pouya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137123022","display_name":"Shawn He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Shawn","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137188806","display_name":"Pedro Pablo Guerrero Vela","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vela, Pedro Pablo Guerrero","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137173430","display_name":"C. Thomas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thomas, C.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137100494","display_name":"Alex Wong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wong, Alex","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5018625427","display_name":"Sirisha Rambhatla","orcid":"https://orcid.org/0000-0002-9389-727X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rambhatla, Sirisha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9332000017166138,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9332000017166138,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.008200000040233135,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.006500000134110451,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.6341999769210815},{"id":"https://openalex.org/keywords/minimum-bounding-box","display_name":"Minimum bounding box","score":0.6187000274658203},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5333999991416931},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.522599995136261},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5164999961853027},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.46889999508857727},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4138000011444092},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.4059999883174896},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.396699994802475}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.651199996471405},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.6341999769210815},{"id":"https://openalex.org/C147037132","wikidata":"https://www.wikidata.org/wiki/Q6865426","display_name":"Minimum bounding box","level":3,"score":0.6187000274658203},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5591999888420105},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5333999991416931},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.522599995136261},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5164999961853027},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.46889999508857727},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4636000096797943},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4138000011444092},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.4059999883174896},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.396699994802475},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.37450000643730164},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.37310001254081726},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.36570000648498535},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3603000044822693},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3449000120162964},{"id":"https://openalex.org/C2781415353","wikidata":"https://www.wikidata.org/wiki/Q5196602","display_name":"Cut-point","level":2,"score":0.305400013923645},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.29670000076293945},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.29510000348091125},{"id":"https://openalex.org/C198352243","wikidata":"https://www.wikidata.org/wiki/Q37105","display_name":"Line (geometry)","level":2,"score":0.287200003862381},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.28679999709129333},{"id":"https://openalex.org/C94966114","wikidata":"https://www.wikidata.org/wiki/Q29256","display_name":"Black box","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2727999985218048},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26989999413490295},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.26739999651908875},{"id":"https://openalex.org/C83248878","wikidata":"https://www.wikidata.org/wiki/Q344000","display_name":"Active appearance model","level":3,"score":0.26409998536109924},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.2581000030040741},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.257099986076355},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.26689","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.26689","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.26689","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.26689","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Modern":[0],"referring":[1],"image":[2],"segmentation":[3],"pipelines":[4],"couple":[5],"a":[6,13,67,133,147,181,192],"vision-language":[7],"model":[8,173],"(VLM)":[9],"for":[10,23],"grounding":[11],"with":[12,157,171],"promptable":[14],"segmenter":[15],"such":[16],"as":[17],"the":[18,45,49,55,60,79,83,85,91,121,205,223],"Segment":[19],"Anything":[20],"Model":[21],"(SAM)":[22],"mask":[24],"generation.":[25],"Prior":[26],"training-free":[27,183],"instances":[28],"of":[29,150],"this":[30,129,142,177],"recipe":[31],"consistently":[32],"trail":[33],"fine-tuned":[34],"and":[35,39,111,114,125,203,219],"reinforcement-learning":[36],"(RL)-tuned":[37],"specialists,":[38],"it":[40],"has":[41],"been":[42],"unclear":[43],"whether":[44],"gap":[46,61,130],"comes":[47],"from":[48,201],"VLM's":[50],"grounding,":[51],"SAM's":[52],"capacity,":[53],"or":[54],"prompt.":[56],"We":[57,175],"show":[58,140],"that":[59,106,141,186],"is":[62,144],"dominated":[63],"by":[64,131,166],"prompt":[65],"ambiguity:":[66],"VLM-proposed":[68],"bounding":[69],"box":[70],"(bbox)":[71],"leaves":[72],"SAM":[73],"to":[74,82,120,135,208],"guess":[75],"which":[76],"pixels":[77],"inside":[78],"bbox":[80,122],"belong":[81],"object":[84],"expression":[86],"denotes.":[87],"Interior":[88],"points":[89,105,168,199],"are":[90],"natural":[92],"disambiguator,":[93],"but":[94],"where":[95],"they":[96],"fall":[97],"matters;":[98],"prior":[99],"work":[100],"relies":[101],"on":[102,108,222],"naively":[103],"sampled":[104],"land":[107],"boundaries,":[109,202],"distractors,":[110],"background":[112],"clutter,":[113],"can":[115],"even":[116],"hurt":[117],"performance":[118],"compared":[119],"alone.":[123],"Supervised":[124],"RL-tuned":[126,220],"methods":[127],"close":[128],"training":[132,143],"VLM":[134,207,230],"predict":[136],"better":[137],"points;":[138],"we":[139],"unnecessary.":[145],"At":[146],"matched":[148],"budget":[149],"five":[151],"interior":[152],"points,":[153],"replacing":[154],"naive":[155],"sampling":[156],"stable,":[158],"informative":[159],"point":[160,184],"selection":[161],"improves":[162],"cumulative":[163],"Intersection-over-Union":[164],"(cIoU)":[165],"12-18":[167],"across":[169],"RefCOCO/+/g,":[170],"every":[172],"fixed.":[174],"turn":[176],"observation":[178],"into":[179,191],"PinPoint,":[180],"deterministic,":[182],"selector":[185],"fuses":[187],"four":[188],"visual":[189],"cues":[190],"consensus":[193],"map,":[194],"selects":[195],"compact,":[196],"spatially":[197],"diverse":[198],"away":[200],"uses":[204],"frozen":[206],"label":[209],"each":[210],"point.":[211],"Without":[212],"any":[213],"task-specific":[214],"training,":[215],"PinPoint":[216],"matches":[217],"supervised":[218],"specialists":[221],"same":[224],"stack":[225],"while":[226],"issuing":[227],"only":[228],"two":[229],"calls":[231],"per":[232],"query.":[233]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-28T00:00:00"}
