{"id":"https://openalex.org/W4200513045","doi":"https://doi.org/10.1145/3490725.3490736","title":"A Lightweight Visual Question Answering Model based on Semantic Similarity","display_name":"A Lightweight Visual Question Answering Model based on Semantic Similarity","publication_year":2021,"publication_date":"2021-09-17","ids":{"openalex":"https://openalex.org/W4200513045","doi":"https://doi.org/10.1145/3490725.3490736"},"language":"en","primary_location":{"id":"doi:10.1145/3490725.3490736","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3490725.3490736","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 The 4th International Conference on Machine Learning and Machine Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102617605","display_name":"Zhiming He","orcid":null},"institutions":[{"id":"https://openalex.org/I4510145","display_name":"Jiangxi University of Science and Technology","ror":"https://ror.org/03q0t9252","country_code":"CN","type":"education","lineage":["https://openalex.org/I4510145"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhiming He","raw_affiliation_strings":["School of Information Engineering, Jiangxi University of Science and Technology, China"],"affiliations":[{"raw_affiliation_string":"School of Information Engineering, Jiangxi University of Science and Technology, China","institution_ids":["https://openalex.org/I4510145"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062118740","display_name":"Jingping Zeng","orcid":null},"institutions":[{"id":"https://openalex.org/I4510145","display_name":"Jiangxi University of Science and Technology","ror":"https://ror.org/03q0t9252","country_code":"CN","type":"education","lineage":["https://openalex.org/I4510145"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingping Zeng","raw_affiliation_strings":["School of Information Engineering, Jiangxi University of Science and Technology, China"],"affiliations":[{"raw_affiliation_string":"School of Information Engineering, Jiangxi University of Science and Technology, China","institution_ids":["https://openalex.org/I4510145"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5102617605"],"corresponding_institution_ids":["https://openalex.org/I4510145"],"apc_list":null,"apc_paid":null,"fwci":0.0961,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.42566993,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"71","last_page":"76"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9886999726295471,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7944457530975342},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6430650949478149},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.6319481730461121},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.6209613084793091},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.6102162003517151},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5334599018096924},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4465109705924988},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.4394034147262573},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.41583308577537537},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.41010552644729614},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3734356760978699},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3674340844154358},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3420335054397583}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7944457530975342},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6430650949478149},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.6319481730461121},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.6209613084793091},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.6102162003517151},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5334599018096924},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4465109705924988},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.4394034147262573},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.41583308577537537},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.41010552644729614},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3734356760978699},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3674340844154358},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3420335054397583}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3490725.3490736","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3490725.3490736","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 The 4th International Conference on Machine Learning and Machine Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1933349210","https://openalex.org/W2064675550","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2277195237","https://openalex.org/W2560730294","https://openalex.org/W2618530766","https://openalex.org/W2745461083","https://openalex.org/W2963383024","https://openalex.org/W2963398599","https://openalex.org/W2963954913","https://openalex.org/W2964118342","https://openalex.org/W2964138343","https://openalex.org/W3004349648","https://openalex.org/W3006750887","https://openalex.org/W4245551996"],"related_works":["https://openalex.org/W2389818373","https://openalex.org/W3192083251","https://openalex.org/W2220831889","https://openalex.org/W4312683641","https://openalex.org/W3027421045","https://openalex.org/W2114797768","https://openalex.org/W2360571912","https://openalex.org/W2380654781","https://openalex.org/W2576320324","https://openalex.org/W2980386803"],"abstract_inverted_index":{"The":[0,18,149,160],"key":[1],"of":[2,12,34,51,126,178],"visual":[3,62],"question":[4,16,38,63,75],"answering":[5,64],"is":[6,90,102,130],"to":[7,25,79,111,144],"learn":[8,170],"the":[9,22,31,71,80,85,97,113,123,133,141,146,153,165,187],"semantic":[10,68,87,100,114,172],"alignment":[11,173],"image":[13,35,72,127],"objects":[14,36],"and":[15,37,74,84,128,143,180],"words.":[17],"typical":[19],"methods":[20,189],"use":[21],"attention":[23,32,42,120],"mechanism":[24],"achieve":[26,181],"this":[27,55,57],"goal.":[28],"However,":[29],"calculating":[30],"weight":[33],"keywords":[39],"requires":[40],"an":[41],"function,":[43],"a":[44,48,60,175],"function":[45],"usually":[46],"required":[47],"large":[49],"number":[50,177],"parameters.":[52,159],"Focusing":[53],"on":[54,67,190],"issue,":[56],"paper":[58],"proposes":[59],"lightweight":[61],"model":[65,167],"based":[66],"similarity.":[69,95],"Firstly,":[70],"features":[73,76],"are":[77],"mapped":[78],"common":[81],"visual-semantic":[82],"space,":[83],"multi-modal":[86],"similarity":[88,115],"matrix":[89,116],"constructed":[91],"by":[92,105,152],"using":[93,106],"cosine":[94],"Then,":[96],"multi-level":[98],"potential":[99],"space":[101],"further":[103],"explored":[104],"multi-channel":[107],"convolution":[108],"neural":[109],"network":[110],"map":[112],"into":[117,140],"two":[118],"different":[119],"distributions.":[121],"Finally,":[122],"joint":[124],"representation":[125],"text":[129],"learned":[131],"through":[132],"multimodal":[134,171],"fusion,":[135],"which":[136],"will":[137],"be":[138],"fed":[139],"classifier":[142],"predict":[145],"correct":[147],"answer.":[148],"co-attention":[150],"achieved":[151],"proposed":[154,166],"method":[155],"with":[156,174],"very":[157],"few":[158],"experiment":[161],"results":[162],"show":[163],"that":[164],"can":[168],"effectively":[169],"small":[176],"parameters":[179],"competitive":[182],"or":[183],"better":[184],"performance":[185],"than":[186],"state-of-the-art":[188],"VQA":[191],"v2.0":[192],"dataset.":[193]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
