{"id":"https://openalex.org/W4309342494","doi":"https://doi.org/10.1109/smc53654.2022.9945109","title":"BSAM: Research on image-text matching method based on Bert and self-attention mechanism","display_name":"BSAM: Research on image-text matching method based on Bert and self-attention mechanism","publication_year":2022,"publication_date":"2022-10-09","ids":{"openalex":"https://openalex.org/W4309342494","doi":"https://doi.org/10.1109/smc53654.2022.9945109"},"language":"en","primary_location":{"id":"doi:10.1109/smc53654.2022.9945109","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc53654.2022.9945109","pdf_url":null,"source":{"id":"https://openalex.org/S4363607746","display_name":"2022 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036453482","display_name":"Jishu Wei","orcid":null},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]},{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jishu Wei","raw_affiliation_strings":["Qilu University of Technology (Shandong Academy of Sciences),Computer Science and Technology,Jinan,China","Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China"],"affiliations":[{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences),Computer Science and Technology,Jinan,China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]},{"raw_affiliation_string":"Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073081338","display_name":"Tao Sun","orcid":"https://orcid.org/0000-0003-2220-930X"},"institutions":[{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Sun","raw_affiliation_strings":["Qilu University of Technology (Shandong Academy of Sciences),Computer Science and Technology,Jinan,China","Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China"],"affiliations":[{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences),Computer Science and Technology,Jinan,China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]},{"raw_affiliation_string":"Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084797285","display_name":"Zhibang Quan","orcid":"https://orcid.org/0000-0002-5429-2630"},"institutions":[{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhibang Quan","raw_affiliation_strings":["Qilu University of Technology (Shandong Academy of Sciences),Computer Science and Technology,Jinan,China","Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China"],"affiliations":[{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences),Computer Science and Technology,Jinan,China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]},{"raw_affiliation_string":"Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005622399","display_name":"Mengli Su","orcid":null},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]},{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengli Su","raw_affiliation_strings":["Qilu University of Technology (Shandong Academy of Sciences),Computer Science and Technology,Jinan,China","Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China"],"affiliations":[{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences),Computer Science and Technology,Jinan,China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]},{"raw_affiliation_string":"Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100404244","display_name":"Zihao Zhang","orcid":"https://orcid.org/0000-0003-0808-220X"},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]},{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zihao Zhang","raw_affiliation_strings":["Qilu University of Technology (Shandong Academy of Sciences),Computer Science and Technology,Jinan,China","Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China"],"affiliations":[{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences),Computer Science and Technology,Jinan,China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]},{"raw_affiliation_string":"Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002112209","display_name":"Shenjie Zhong","orcid":null},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]},{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shenjie Zhong","raw_affiliation_strings":["Qilu University of Technology (Shandong Academy of Sciences),Computer Science and Technology,Jinan,China","Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China"],"affiliations":[{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences),Computer Science and Technology,Jinan,China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]},{"raw_affiliation_string":"Computer Science and Technology, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5036453482"],"corresponding_institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.11428775,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"30","issue":null,"first_page":"1688","last_page":"1693"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7709736824035645},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.6559134721755981},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6555619239807129},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.6161807775497437},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5859789848327637},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.5852724313735962},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.551077127456665},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5350834727287292},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5310835242271423},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5257902145385742},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.48150333762168884},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.38984841108322144},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3522418439388275},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1229584813117981}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7709736824035645},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.6559134721755981},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6555619239807129},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.6161807775497437},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5859789848327637},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.5852724313735962},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.551077127456665},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5350834727287292},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5310835242271423},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5257902145385742},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.48150333762168884},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.38984841108322144},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3522418439388275},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1229584813117981},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/smc53654.2022.9945109","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc53654.2022.9945109","pdf_url":null,"source":{"id":"https://openalex.org/S4363607746","display_name":"2022 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.47999998927116394,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1484210532","https://openalex.org/W1514535095","https://openalex.org/W1905882502","https://openalex.org/W2546696630","https://openalex.org/W2552579943","https://openalex.org/W2563587296","https://openalex.org/W2606473278","https://openalex.org/W2745461083","https://openalex.org/W2778940641","https://openalex.org/W2884585870","https://openalex.org/W2894786240","https://openalex.org/W2962964995","https://openalex.org/W2964022527","https://openalex.org/W2964120214","https://openalex.org/W2981586349","https://openalex.org/W4298392976","https://openalex.org/W4385245566","https://openalex.org/W6620707391","https://openalex.org/W6628927728","https://openalex.org/W6630875275","https://openalex.org/W6682086655","https://openalex.org/W6739901393","https://openalex.org/W6753412334"],"related_works":["https://openalex.org/W2329500892","https://openalex.org/W28991112","https://openalex.org/W2370726991","https://openalex.org/W2369710579","https://openalex.org/W4327728159","https://openalex.org/W4394266730","https://openalex.org/W1990856605","https://openalex.org/W2053783616","https://openalex.org/W2545348020","https://openalex.org/W2910600015"],"abstract_inverted_index":{"Image-text":[0],"matching":[1,196,203],"plays":[2],"a":[3,88],"crucial":[4],"role":[5],"in":[6,16],"connecting":[7],"vision":[8],"and":[9,22,28,63,69,93,119,123,135,142,158,166,181,193,201,214],"language.":[10],"The":[11,161],"details":[12],"of":[13,46,56,66,72,77,110,146],"the":[14,17,19,23,26,29,33,43,47,54,57,60,67,70,73,78,99,103,108,111,126,143,147,153,174,179,219],"objects":[15,45],"image,":[18,48],"positional":[20],"relationship,":[21],"correspondence":[24],"between":[25,155],"background":[27,64,124,159],"text":[30,162],"description":[31],"are":[32,129],"keys":[34],"to":[35,53,106,116,187],"image-text":[36],"matching.":[37],"Previous":[38],"studies":[39],"either":[40],"only":[41,50],"extract":[42],"salient":[44],"or":[49],"pay":[51,114],"attention":[52,115],"location":[55],"object,":[58,68],"ignoring":[59],"detailed":[61,121],"features":[62,65,122,134,137,165,168],"extraction":[71],"overall":[74],"semantic":[75],"information":[76,145],"image":[79,100,127,148,191],"is":[80,149],"not":[81],"comprehensive":[82],"enough.":[83],"Accordingly,":[84],"this":[85],"paper":[86],"proposes":[87],"model":[89,221],"based":[90,151],"on":[91,152,210],"Bert":[92,175],"Self-Attention":[94],"Mechanism":[95],"(BSAM),":[96],"we":[97],"segment":[98],"area,":[101,113],"use":[102],"self-attention":[104],"mechanism":[105],"enhance":[107,195],"weight":[109],"key":[112],"each":[117,156],"object":[118],"their":[120],"features,":[125],"regions":[128,192],"mapped":[130],"into":[131],"original":[132],"region":[133,140,157],"new":[136,167],"with":[138,169,198,205],"other":[139,170],"relationships,":[141],"global":[144],"inferred":[150],"relationship":[154],"features.":[160],"extracts":[163],"word":[164,171],"relationships":[172],"through":[173],"model.":[176],"We":[177],"propose":[178],"Cross-Attention":[180],"Similarity":[182],"-Attention":[183],"Filtering":[184],"(CA-SAF)":[185],"module":[186],"align":[188],"all":[189],"relevant":[190],"words,":[194],"pairs":[197,204],"high":[199],"weights,":[200],"filter":[202],"lower":[206],"weights.":[207],"Extensive":[208],"experiments":[209],"two":[211],"datasets,":[212],"Flickr30K":[213],"MS":[215],"COCO,":[216],"show":[217],"that":[218],"BSAM":[220],"significantly":[222],"outperforms":[223],"state-of-the-art":[224],"methods.":[225]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
