{"id":"https://openalex.org/W4392903030","doi":"https://doi.org/10.1109/icassp48485.2024.10448302","title":"Human Guided Cross-Modal Reasoning with Semantic Attention Learning for Visual Question Answering","display_name":"Human Guided Cross-Modal Reasoning with Semantic Attention Learning for Visual Question Answering","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903030","doi":"https://doi.org/10.1109/icassp48485.2024.10448302"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10448302","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10448302","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052227145","display_name":"Lei Liao","orcid":"https://orcid.org/0000-0002-4751-504X"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lei Liao","raw_affiliation_strings":["Sun Yat-sen University,School of Computer Science and Engineering,Guangzhou,China","School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University,School of Computer Science and Engineering,Guangzhou,China","institution_ids":["https://openalex.org/I157773358"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086987894","display_name":"Feng Mao","orcid":"https://orcid.org/0000-0002-7896-4809"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mao Feng","raw_affiliation_strings":["Sun Yat-sen University,School of Computer Science and Engineering,Guangzhou,China","School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University,School of Computer Science and Engineering,Guangzhou,China","institution_ids":["https://openalex.org/I157773358"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037873810","display_name":"Meng Yang","orcid":"https://orcid.org/0000-0002-0795-3221"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Yang","raw_affiliation_strings":["Sun Yat-sen University,School of Computer Science and Engineering,Guangzhou,China","School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University,School of Computer Science and Engineering,Guangzhou,China","institution_ids":["https://openalex.org/I157773358"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5052227145"],"corresponding_institution_ids":["https://openalex.org/I157773358"],"apc_list":null,"apc_paid":null,"fwci":0.4875,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.60161123,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"2775","last_page":"2779"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.7356290221214294},{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.7317337989807129},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7254955768585205},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6427984237670898},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.5456023216247559},{"id":"https://openalex.org/keywords/deductive-reasoning","display_name":"Deductive reasoning","score":0.5281375050544739},{"id":"https://openalex.org/keywords/qualitative-reasoning","display_name":"Qualitative reasoning","score":0.5248914957046509},{"id":"https://openalex.org/keywords/reasoning-system","display_name":"Reasoning system","score":0.5081150531768799},{"id":"https://openalex.org/keywords/model-based-reasoning","display_name":"Model-based reasoning","score":0.48799556493759155},{"id":"https://openalex.org/keywords/commonsense-reasoning","display_name":"Commonsense reasoning","score":0.46564018726348877},{"id":"https://openalex.org/keywords/analytic-reasoning","display_name":"Analytic reasoning","score":0.4632972180843353},{"id":"https://openalex.org/keywords/knowledge-representation-and-reasoning","display_name":"Knowledge representation and reasoning","score":0.4628407955169678},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.45687946677207947},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4273782968521118},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.4258642792701721}],"concepts":[{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.7356290221214294},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.7317337989807129},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7254955768585205},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6427984237670898},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.5456023216247559},{"id":"https://openalex.org/C97364631","wikidata":"https://www.wikidata.org/wiki/Q484284","display_name":"Deductive reasoning","level":2,"score":0.5281375050544739},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.5248914957046509},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.5081150531768799},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.48799556493759155},{"id":"https://openalex.org/C193221554","wikidata":"https://www.wikidata.org/wiki/Q5153664","display_name":"Commonsense reasoning","level":2,"score":0.46564018726348877},{"id":"https://openalex.org/C103057564","wikidata":"https://www.wikidata.org/wiki/Q4751139","display_name":"Analytic reasoning","level":3,"score":0.4632972180843353},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.4628407955169678},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.45687946677207947},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4273782968521118},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.4258642792701721}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10448302","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10448302","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.4099999964237213,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3569266497","display_name":null,"funder_award_id":"20220101","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5939423041","display_name":null,"funder_award_id":"Technology","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6484137807","display_name":null,"funder_award_id":"62176271","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1536680647","https://openalex.org/W2745461083","https://openalex.org/W2747623286","https://openalex.org/W2896457183","https://openalex.org/W2963150162","https://openalex.org/W2963518342","https://openalex.org/W2964018924","https://openalex.org/W2964067226","https://openalex.org/W2998356391","https://openalex.org/W2998631105","https://openalex.org/W3094502228","https://openalex.org/W3118500473","https://openalex.org/W3163821660","https://openalex.org/W3174366544","https://openalex.org/W3202958533","https://openalex.org/W4212774754","https://openalex.org/W4226182655","https://openalex.org/W4226397046","https://openalex.org/W4230419477","https://openalex.org/W4291900964","https://openalex.org/W4293524578","https://openalex.org/W4308235961","https://openalex.org/W6748270630","https://openalex.org/W6796481600","https://openalex.org/W6798805250","https://openalex.org/W6802115918","https://openalex.org/W6811013733"],"related_works":["https://openalex.org/W1816891706","https://openalex.org/W2347305376","https://openalex.org/W3081308103","https://openalex.org/W2113245685","https://openalex.org/W2186704254","https://openalex.org/W4232020376","https://openalex.org/W2349790191","https://openalex.org/W1974525301","https://openalex.org/W2261193970","https://openalex.org/W94330105"],"abstract_inverted_index":{"One":[0],"of":[1,12,19,54,63,99,120],"the":[2,6,16,23,51,55,61,64,69,92,102,111,124,132,140,144,154,159,170],"major":[3],"difficulties":[4],"in":[5],"Visual":[7],"Question":[8],"Answering":[9],"(VQA)":[10],"task":[11,161],"real-world":[13,171],"images":[14],"is":[15,43,72],"long-tailed":[17],"distribution":[18,127],"concepts":[20],"which":[21],"makes":[22],"model":[24,65],"vulnerable":[25],"to":[26,59,75,90,109,138],"negative":[27],"linguistic":[28],"biases.":[29],"To":[30],"imitate":[31],"human":[32,151],"learning":[33,89,108],"and":[34,48,67,114,147,150,178],"reasoning,":[35],"researchers":[36],"have":[37],"designed":[38],"reasoning":[39,66,93,97,103,134,141,152],"models,":[40],"which,":[41],"however,":[42],"still":[44],"a":[45,73,80],"black-box":[46],"process":[47],"cannot":[49],"guarantee":[50],"visual":[52],"interpretability":[53],"final":[56],"answer.":[57],"How":[58],"guide":[60,139],"direction":[62],"improve":[68,91],"generalization":[70],"ability":[71],"challenge":[74],"be":[76],"solved.":[77],"We":[78],"proposed":[79,155],"novel":[81],"Human-Guided":[82],"Cross-Modal":[83],"Reasoning":[84],"(HGCMR)":[85],"with":[86,162],"semantic":[87,106],"attention":[88,107,126],"ability.":[94],"The":[95,117],"cross-modal":[96],"module":[98,119],"HGCMR":[100,121,156,175],"imitates":[101],"steps":[104],"via":[105],"generate":[110],"contextural":[112],"image":[113,146],"question":[115,148],"representation.":[116],"supervision":[118],"automatically":[122],"extracts":[123],"human-guided":[125],"over":[128],"object":[129],"regions":[130],"from":[131],"provided":[133],"patterns,":[135],"so":[136],"as":[137],"process.":[142],"With":[143],"attended":[145],"representation":[149],"supervision,":[153],"finally":[157],"complete":[158],"question-answering":[160],"an":[163],"output":[164],"classifier.":[165],"By":[166],"evaluating":[167],"models":[168],"on":[169],"dataset":[172],"GQA,":[173],"our":[174],"improves":[176],"compositional":[177],"grounding":[179],"performance.":[180]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
