{"id":"https://openalex.org/W4308987613","doi":"https://doi.org/10.1145/3557918.3565874","title":"Remote sensing visual question answering with a self-attention multi-modal encoder","display_name":"Remote sensing visual question answering with a self-attention multi-modal encoder","publication_year":2022,"publication_date":"2022-11-01","ids":{"openalex":"https://openalex.org/W4308987613","doi":"https://doi.org/10.1145/3557918.3565874"},"language":"en","primary_location":{"id":"doi:10.1145/3557918.3565874","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3557918.3565874","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3557918.3565874","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th ACM SIGSPATIAL International Workshop on AI for Geographic Knowledge Discovery","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3557918.3565874","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014468863","display_name":"Jo\u00e3o Daniel Silva","orcid":"https://orcid.org/0000-0001-6474-7822"},"institutions":[{"id":"https://openalex.org/I141596103","display_name":"University of Lisbon","ror":"https://ror.org/01c27hj86","country_code":"PT","type":"education","lineage":["https://openalex.org/I141596103"]}],"countries":["PT"],"is_corresponding":true,"raw_author_name":"Jo\u00e3o Daniel Silva","raw_affiliation_strings":["University of Lisbon, Lisbon, Portugal"],"affiliations":[{"raw_affiliation_string":"University of Lisbon, Lisbon, Portugal","institution_ids":["https://openalex.org/I141596103"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002705720","display_name":"Jo\u00e3o Magalh\u00e3es","orcid":"https://orcid.org/0000-0001-6290-5719"},"institutions":[{"id":"https://openalex.org/I83558840","display_name":"Universidade Nova de Lisboa","ror":"https://ror.org/02xankh89","country_code":"PT","type":"education","lineage":["https://openalex.org/I83558840"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Jo\u00e3o Magalh\u00e3es","raw_affiliation_strings":["NOVA University, Lisbon, Portugal"],"affiliations":[{"raw_affiliation_string":"NOVA University, Lisbon, Portugal","institution_ids":["https://openalex.org/I83558840"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005192117","display_name":"Devis Tuia","orcid":"https://orcid.org/0000-0003-0374-2459"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Devis Tuia","raw_affiliation_strings":["Swiss Federal Institute of Technology Lausanne, Sion, Switzerland"],"affiliations":[{"raw_affiliation_string":"Swiss Federal Institute of Technology Lausanne, Sion, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055101594","display_name":"Bruno Martins","orcid":"https://orcid.org/0000-0002-3856-2936"},"institutions":[{"id":"https://openalex.org/I141596103","display_name":"University of Lisbon","ror":"https://ror.org/01c27hj86","country_code":"PT","type":"education","lineage":["https://openalex.org/I141596103"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Bruno Martins","raw_affiliation_strings":["University of Lisbon, Lisbon, Portugal"],"affiliations":[{"raw_affiliation_string":"University of Lisbon, Lisbon, Portugal","institution_ids":["https://openalex.org/I141596103"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5014468863"],"corresponding_institution_ids":["https://openalex.org/I141596103"],"apc_list":null,"apc_paid":null,"fwci":0.8166,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.7352565,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"40","last_page":"49"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8403770923614502},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.7471022009849548},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6126824617385864},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5502246022224426},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.537281334400177},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.5180380940437317},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4877803921699524},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.36931419372558594},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36040443181991577},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3576626181602478},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.1049061119556427}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8403770923614502},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.7471022009849548},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6126824617385864},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5502246022224426},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.537281334400177},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.5180380940437317},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4877803921699524},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.36931419372558594},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36040443181991577},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3576626181602478},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.1049061119556427},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3557918.3565874","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3557918.3565874","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3557918.3565874","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th ACM SIGSPATIAL International Workshop on AI for Geographic Knowledge Discovery","raw_type":"proceedings-article"},{"id":"pmh:oai:infoscience.epfl.ch:300088","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/194693","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"conference proceedings"}],"best_oa_location":{"id":"doi:10.1145/3557918.3565874","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3557918.3565874","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3557918.3565874","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th ACM SIGSPATIAL International Workshop on AI for Geographic Knowledge Discovery","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.6299999952316284,"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11"}],"awards":[{"id":"https://openalex.org/G1299994043","display_name":null,"funder_award_id":"PIDDAC","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"},{"id":"https://openalex.org/G2441341484","display_name":null,"funder_award_id":"UIDB/50021/2020","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"},{"id":"https://openalex.org/G2582522631","display_name":null,"funder_award_id":"Funda\u00e7\u00e3o para a Ci\u00eancia e Tecnologia","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"},{"id":"https://openalex.org/G3272457656","display_name":null,"funder_award_id":"UIDB/","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"},{"id":"https://openalex.org/G3445675980","display_name":null,"funder_award_id":"PTDC/CCI-CIF/32607/2017 (MIMU)","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"},{"id":"https://openalex.org/G5008780491","display_name":null,"funder_award_id":"PTDC/CCI-CIF/32607/2017","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"},{"id":"https://openalex.org/G5454278309","display_name":null,"funder_award_id":"/2017","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"},{"id":"https://openalex.org/G6299983296","display_name":null,"funder_award_id":"PTDC/","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"}],"funders":[{"id":"https://openalex.org/F4320334779","display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","ror":"https://ror.org/00snfqn58"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4308987613.pdf","grobid_xml":"https://content.openalex.org/works/W4308987613.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W2095705004","https://openalex.org/W2962933067","https://openalex.org/W2963163009","https://openalex.org/W2963705779","https://openalex.org/W2964067226","https://openalex.org/W2983550634","https://openalex.org/W3012111773","https://openalex.org/W3047579407","https://openalex.org/W3154791864","https://openalex.org/W3164670515","https://openalex.org/W3168972675","https://openalex.org/W3175815213","https://openalex.org/W3206799531","https://openalex.org/W3214363026","https://openalex.org/W4200631575","https://openalex.org/W4213157969","https://openalex.org/W4292828962"],"related_works":["https://openalex.org/W2384605597","https://openalex.org/W2387743295","https://openalex.org/W3082787378","https://openalex.org/W2136007095","https://openalex.org/W2366230879","https://openalex.org/W3208425359","https://openalex.org/W2349927912","https://openalex.org/W4288102755","https://openalex.org/W4381058564","https://openalex.org/W2964413124"],"abstract_inverted_index":{"Visual":[0],"Question":[1],"Answering":[2],"(VQA)":[3],"on":[4,47,53,69],"remote":[5,78,142],"sensing":[6,79,143],"imagery":[7],"can":[8,71,127,155],"help":[9],"non-expert":[10],"users":[11],"in":[12,37,166],"extracting":[13],"information":[14],"from":[15,110],"Earth":[16],"observation":[17],"data.":[18,120],"Current":[19],"approaches":[20],"follow":[21],"a":[22,66,90,157],"neural":[23],"encoder-decoder":[24],"design,":[25],"combining":[26],"convolutional":[27],"and":[28,100,145],"recurrent":[29],"encoders":[30,51],"together":[31],"with":[32,85,116],"cross-modal":[33],"fusion":[34],"components.":[35],"However,":[36],"other":[38],"VQA":[39,99],"application":[40],"domains,":[41],"the":[42,54,62,105,111,124,141,152],"current":[43],"state-of-the-art":[44],"methods":[45,76],"rely":[46],"self-attention,":[48],"employing":[49],"multi-modal":[50],"based":[52,68],"Transformer":[55],"architecture.":[56],"In":[57],"this":[58],"work,":[59],"we":[60,154],"assess":[61],"degree":[63],"to":[64,140],"which":[65,101],"model":[67,92,114],"self-attention":[70],"bring":[72],"improvements":[73],"over":[74,130,160],"previous":[75,91,131,167],"for":[77,97],"VQA.":[80],"We":[81],"specifically":[82],"present":[83],"results":[84,129],"an":[86],"extended":[87],"version":[88],"of":[89,107,119,151],"named":[93],"MM-BERT,":[94],"originally":[95],"proposed":[96,125],"medical":[98],"does":[102],"not":[103],"require":[104],"extraction":[106],"region":[108],"features":[109],"images,":[112,153],"or":[113,137],"pre-training":[115,136],"extensive":[117],"amounts":[118],"Experiments":[121],"show":[122],"that":[123],"method":[126],"improve":[128],"approaches.":[132],"Even":[133],"without":[134],"in-domain":[135],"specific":[138],"adaptations":[139],"domain,":[144],"using":[146],"as":[147],"input":[148],"low-resolution":[149],"versions":[150],"achieve":[156],"high":[158],"accuracy":[159],"three":[161],"different":[162],"datasets":[163],"extensively":[164],"used":[165],"studies.":[168]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
