{"id":"https://openalex.org/W7137847064","doi":"https://doi.org/10.1609/aaai.v40i19.38678","title":"Improving Region Representation Learning from Urban Imagery with Noisy Long-Caption Supervision","display_name":"Improving Region Representation Learning from Urban Imagery with Noisy Long-Caption Supervision","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137847064","doi":"https://doi.org/10.1609/aaai.v40i19.38678"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i19.38678","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i19.38678","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38678/42640","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38678/42640","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129728724","display_name":"Yimei Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yimei Zhang","raw_affiliation_strings":["Zhejiang University of Technology\nZhejiang Key Laboratory of Visual Information Intelligent Processing"],"affiliations":[{"raw_affiliation_string":"Zhejiang University of Technology\nZhejiang Key Laboratory of Visual Information Intelligent Processing","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129657107","display_name":"Guojiang Shen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guojiang Shen","raw_affiliation_strings":["Zhejiang University of Technology\nZhejiang Key Laboratory of Visual Information Intelligent Processing"],"affiliations":[{"raw_affiliation_string":"Zhejiang University of Technology\nZhejiang Key Laboratory of Visual Information Intelligent Processing","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125707292","display_name":"Kaili Ning","orcid":null},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaili Ning","raw_affiliation_strings":["Zhejiang University of Technology\nZhejiang Key Laboratory of Visual Information Intelligent Processing"],"affiliations":[{"raw_affiliation_string":"Zhejiang University of Technology\nZhejiang Key Laboratory of Visual Information Intelligent Processing","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084591510","display_name":"Tongwei Ren","orcid":"https://orcid.org/0000-0003-3092-424X"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tongwei Ren","raw_affiliation_strings":["Nanjing University"],"affiliations":[{"raw_affiliation_string":"Nanjing University","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129701681","display_name":"Xuebo Qiu","orcid":null},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuebo Qiu","raw_affiliation_strings":["Zhejiang University of Technology"],"affiliations":[{"raw_affiliation_string":"Zhejiang University of Technology","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129718947","display_name":"Mengmeng Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengmeng Wang","raw_affiliation_strings":["Zhejiang University of Technology\nZhejiang Key Laboratory of Visual Information Intelligent Processing"],"affiliations":[{"raw_affiliation_string":"Zhejiang University of Technology\nZhejiang Key Laboratory of Visual Information Intelligent Processing","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129643121","display_name":"Xiangjie Kong","orcid":null},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangjie Kong","raw_affiliation_strings":["Zhejiang University of Technology\nZhejiang Key Laboratory of Visual Information Intelligent Processing"],"affiliations":[{"raw_affiliation_string":"Zhejiang University of Technology\nZhejiang Key Laboratory of Visual Information Intelligent Processing","institution_ids":["https://openalex.org/I4210123185"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5129728724"],"corresponding_institution_ids":["https://openalex.org/I4210123185"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07068452,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"19","first_page":"16397","last_page":"16405"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6471999883651733,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6471999883651733,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11980","display_name":"Human Mobility and Location-Based Analysis","score":0.06939999759197235,"subfield":{"id":"https://openalex.org/subfields/3313","display_name":"Transportation"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.05290000140666962,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6330999732017517},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.6304000020027161},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6031000018119812},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.5461000204086304},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5368000268936157},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.5164999961853027},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.3984000086784363},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.3921000063419342}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7498000264167786},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6330999732017517},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.6304000020027161},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6031000018119812},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.590499997138977},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.5461000204086304},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5368000268936157},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.5164999961853027},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40139999985694885},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.3984000086784363},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3921000063419342},{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.38600000739097595},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.36739999055862427},{"id":"https://openalex.org/C49545453","wikidata":"https://www.wikidata.org/wiki/Q69883","display_name":"Urban planning","level":2,"score":0.35409998893737793},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3386000096797943},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.323199987411499},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.29829999804496765},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.2921999990940094},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.27300000190734863},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2639000117778778},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.251800000667572},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i19.38678","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i19.38678","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38678/42640","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i19.38678","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i19.38678","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38678/42640","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities","score":0.821094274520874}],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3267229679","display_name":null,"funder_award_id":"LR21F020003","funder_id":"https://openalex.org/F4320338464","funder_display_name":"Natural Science Foundation of Zhejiang Province"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4211196075","display_name":null,"funder_award_id":"62073295","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4794680191","display_name":null,"funder_award_id":"62072409","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5834871303","display_name":null,"funder_award_id":"62476247","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6414272459","display_name":null,"funder_award_id":"62073295","funder_id":"https://openalex.org/F4320338464","funder_display_name":"Natural Science Foundation of Zhejiang Province"},{"id":"https://openalex.org/G8286626095","display_name":null,"funder_award_id":"2024C01214","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8779220306","display_name":null,"funder_award_id":"62072409","funder_id":"https://openalex.org/F4320338464","funder_display_name":"Natural Science Foundation of Zhejiang Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320338464","display_name":"Natural Science Foundation of Zhejiang Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7137847064.pdf","grobid_xml":"https://content.openalex.org/works/W7137847064.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Region":[0],"representation":[1,61,105],"learning":[2,106,184],"plays":[3],"a":[4,32,95,146,154,173],"pivotal":[5],"role":[6],"in":[7,70,86,129],"urban":[8,16,59,131],"computing":[9],"by":[10],"extracting":[11],"meaningful":[12],"features":[13,74],"from":[14,137],"unlabeled":[15],"data.":[17],"Analogous":[18],"to":[19,53,84,177],"how":[20],"perceived":[21],"facial":[22],"age":[23],"reflects":[24],"an":[25,116],"individual's":[26],"health,":[27],"the":[28,151,168,199],"visual":[29,73,127],"appearance":[30],"of":[31,202],"city":[33],"serves":[34],"as":[35],"its":[36],"\"portrait\",":[37],"encapsulating":[38],"latent":[39],"socio-economic":[40],"and":[41,78,110,140,161,194],"environmental":[42],"characteristics.":[43],"Recent":[44],"studies":[45],"have":[46],"explored":[47],"leveraging":[48],"Large":[49],"Language":[50],"Models":[51],"(LLMs)":[52],"incorporate":[54],"textual":[55],"knowledge":[56,81,136],"into":[57],"imagery-based":[58],"region":[60,104],"learning.":[62],"However,":[63],"two":[64],"major":[65],"challenges":[66],"remain:":[67],"i)":[68],"difficulty":[69],"aligning":[71],"fine-grained":[72,126],"with":[75,125],"long":[76,123],"captions,":[77],"ii)":[79],"suboptimal":[80],"incorporation":[82],"due":[83],"noise":[85],"LLM-generated":[87,138],"captions.":[88],"To":[89,133],"address":[90],"these":[91],"issues,":[92],"we":[93,114,144,171],"propose":[94,145],"novel":[96],"pre-training":[97],"framework":[98],"called":[99],"UrbanLN":[100],"that":[101,121],"improves":[102],"Urban":[103],"through":[107],"Long-text":[108],"awareness":[109],"Noise":[111],"suppression.":[112],"Specifically,":[113],"introduce":[115],"information-preserved":[117],"stretching":[118],"interpolation":[119],"strategy":[120],"aligns":[122],"captions":[124,139,163],"semantics":[128],"complex":[130],"scenes.":[132],"effectively":[134],"mine":[135],"filter":[141],"out":[142],"noise,":[143],"dual-level":[147],"optimization":[148],"strategy.":[149],"At":[150,167],"data":[152],"level,":[153,170],"multi-model":[155],"collaboration":[156],"pipeline":[157],"automatically":[158],"generates":[159],"diverse":[160],"reliable":[162],"without":[164],"human":[165],"intervention.":[166],"model":[169],"employ":[172],"momentum-based":[174],"self-distillation":[175],"mechanism":[176],"generate":[178],"stable":[179],"pseudo-targets,":[180],"facilitating":[181],"robust":[182],"cross-modal":[183],"under":[185],"noisy":[186],"conditions.":[187],"Extensive":[188],"experiments":[189],"across":[190],"four":[191],"real-world":[192],"cities":[193],"various":[195],"downstream":[196],"tasks":[197],"demonstrate":[198],"superior":[200],"performance":[201],"our":[203],"UrbanLN.":[204]},"counts_by_year":[],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2026-03-18T00:00:00"}
