{"id":"https://openalex.org/W4404612032","doi":"https://doi.org/10.1145/3678717.3691296","title":"Towards Zero-Shot Annotation of the Built Environment with Vision-Language Models","display_name":"Towards Zero-Shot Annotation of the Built Environment with Vision-Language Models","publication_year":2024,"publication_date":"2024-10-29","ids":{"openalex":"https://openalex.org/W4404612032","doi":"https://doi.org/10.1145/3678717.3691296"},"language":"en","primary_location":{"id":"doi:10.1145/3678717.3691296","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3678717.3691296","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Advances in Geographic Information Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3678717.3691296","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102741786","display_name":"Bin Han","orcid":"https://orcid.org/0000-0002-5280-9456"},"institutions":[{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]},{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bin Han","raw_affiliation_strings":["University of Washington, Seattle, USA"],"affiliations":[{"raw_affiliation_string":"University of Washington, Seattle, USA","institution_ids":["https://openalex.org/I201448701","https://openalex.org/I58610484"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101822453","display_name":"Yiwei Yang","orcid":"https://orcid.org/0009-0008-0136-6465"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]},{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yiwei Yang","raw_affiliation_strings":["University of Washington, Seattle, USA"],"affiliations":[{"raw_affiliation_string":"University of Washington, Seattle, USA","institution_ids":["https://openalex.org/I201448701","https://openalex.org/I58610484"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108649570","display_name":"Anat Caspi","orcid":"https://orcid.org/0000-0003-0864-0734"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]},{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anat Caspi","raw_affiliation_strings":["University of Washington, Seattle, USA"],"affiliations":[{"raw_affiliation_string":"University of Washington, Seattle, USA","institution_ids":["https://openalex.org/I201448701","https://openalex.org/I58610484"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007124763","display_name":"Bill Howe","orcid":"https://orcid.org/0000-0001-8588-8472"},"institutions":[{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]},{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bill Howe","raw_affiliation_strings":["University of Washington, Seattle, USA"],"affiliations":[{"raw_affiliation_string":"University of Washington, Seattle, USA","institution_ids":["https://openalex.org/I201448701","https://openalex.org/I58610484"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102741786"],"corresponding_institution_ids":["https://openalex.org/I201448701","https://openalex.org/I58610484"],"apc_list":null,"apc_paid":null,"fwci":0.8742,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.81814094,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"601","last_page":"604"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11211","display_name":"3D Surveying and Cultural Heritage","score":0.977400004863739,"subfield":{"id":"https://openalex.org/subfields/1907","display_name":"Geology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11211","display_name":"3D Surveying and Cultural Heritage","score":0.977400004863739,"subfield":{"id":"https://openalex.org/subfields/1907","display_name":"Geology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9606000185012817,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11606","display_name":"Infrastructure Maintenance and Monitoring","score":0.9516000151634216,"subfield":{"id":"https://openalex.org/subfields/2205","display_name":"Civil and Structural Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7251158952713013},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.6632251739501953},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.6337846517562866},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.6127867698669434},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5127052068710327},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4145287275314331},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3652433156967163},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.34154197573661804},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.15851178765296936}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7251158952713013},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.6632251739501953},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.6337846517562866},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.6127867698669434},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5127052068710327},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4145287275314331},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3652433156967163},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.34154197573661804},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.15851178765296936},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3678717.3691296","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3678717.3691296","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Advances in Geographic Information Systems","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3678717.3691296","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3678717.3691296","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Advances in Geographic Information Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","score":0.49000000953674316,"id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2483872464","https://openalex.org/W2891529090","https://openalex.org/W2981387476","https://openalex.org/W3195958646","https://openalex.org/W4224244612","https://openalex.org/W4307136819","https://openalex.org/W4309181071","https://openalex.org/W4313145013","https://openalex.org/W4322707256","https://openalex.org/W4390873481","https://openalex.org/W4390874575","https://openalex.org/W4392397297","https://openalex.org/W4394564126","https://openalex.org/W6863972698"],"related_works":["https://openalex.org/W2074502265","https://openalex.org/W2361861616","https://openalex.org/W4214877189","https://openalex.org/W2263699433","https://openalex.org/W2773965352","https://openalex.org/W2377979023","https://openalex.org/W2218034408","https://openalex.org/W2392921965","https://openalex.org/W2381179799","https://openalex.org/W2980279061"],"abstract_inverted_index":{"Equitable":[0],"urban":[1,52,111],"transportation":[2],"applications":[3],"require":[4,32],"high-fidelity":[5],"digital":[6],"representations":[7],"of":[8,149],"the":[9,58,127,150],"built":[10,82,151],"environment":[11,83,152],"(streets,":[12],"crossings,":[13],"curb":[14],"ramps":[15],"and":[16,20,98,116,129,157,161],"more).":[17],"Direct":[18],"inspections":[19],"manual":[21],"annotations":[22],"are":[23],"costly":[24],"at":[25,67,159],"scale,":[26],"while":[27,122],"conventional":[28],"machine":[29],"learning":[30],"methods":[31],"substantial":[33],"annotated":[34],"training":[35,75],"data":[36],"for":[37,49,80],"adequate":[38],"performance.":[39],"This":[40],"study":[41],"explores":[42],"vision":[43,95],"language":[44,96],"models":[45,65],"as":[46],"a":[47,91,94,99],"tool":[48],"annotating":[50],"diverse":[51,163],"features":[53,112],"from":[54],"satellite":[55],"images,":[56,73],"reducing":[57],"dependence":[59],"on":[60,109],"human":[61],"annotation.":[62],"Although":[63],"these":[64,141],"excel":[66],"describing":[68],"common":[69],"objects":[70],"in":[71,146,162],"human-centric":[72],"their":[74,86],"sets":[76],"may":[77],"lack":[78],"signals":[79],"esoteric":[81],"features,":[84],"making":[85],"performance":[87],"uncertain.":[88],"We":[89,138],"demonstrate":[90],"proof-of-concept":[92],"using":[93],"model":[97],"visual":[100,130],"prompting":[101,124,131],"strategy":[102],"that":[103,121],"considers":[104],"segmented":[105],"image":[106],"elements.":[107],"Experiments":[108],"two":[110],"---":[113,119],"stop":[114],"lines":[115],"raised":[117],"tables":[118],"show":[120],"zero-shot":[123],"rarely":[125],"works,":[126],"segmentation":[128],"strategies":[132],"achieve":[133],"nearly":[134],"40%":[135],"intersection-over-union":[136],"accuracy.":[137],"describe":[139],"how":[140],"results":[142],"motivate":[143],"further":[144],"research":[145],"automatic":[147],"annotation":[148],"to":[153],"improve":[154],"equity,":[155],"accessibility,":[156],"safety":[158],"scale":[160],"environments.":[164]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
