{"id":"https://openalex.org/W4410636376","doi":"https://doi.org/10.1145/3701716.3717564","title":"The DenseCap-Guided Attention Network For Image-Text Matching","display_name":"The DenseCap-Guided Attention Network For Image-Text Matching","publication_year":2025,"publication_date":"2025-05-08","ids":{"openalex":"https://openalex.org/W4410636376","doi":"https://doi.org/10.1145/3701716.3717564"},"language":"en","primary_location":{"id":"doi:10.1145/3701716.3717564","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3701716.3717564","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3701716.3717564","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3701716.3717564","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026473068","display_name":"Xuri Ge","orcid":"https://orcid.org/0000-0002-3925-4951"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xuri Ge","raw_affiliation_strings":["Shandong University, jinan, China"],"raw_orcid":"https://orcid.org/0000-0002-3925-4951","affiliations":[{"raw_affiliation_string":"Shandong University, jinan, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043906886","display_name":"Linqing Li","orcid":null},"institutions":[{"id":"https://openalex.org/I40963666","display_name":"Central China Normal University","ror":"https://ror.org/03x1jna21","country_code":"CN","type":"education","lineage":["https://openalex.org/I40963666"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linqing Li","raw_affiliation_strings":["Central China Normal University, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0009-2184-9971","affiliations":[{"raw_affiliation_string":"Central China Normal University, Wuhan, China","institution_ids":["https://openalex.org/I40963666"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029582812","display_name":"Songpei Xu","orcid":"https://orcid.org/0009-0001-5735-8674"},"institutions":[{"id":"https://openalex.org/I7882870","display_name":"University of Glasgow","ror":"https://ror.org/00vtgdb53","country_code":"GB","type":"education","lineage":["https://openalex.org/I7882870"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Songpei Xu","raw_affiliation_strings":["University of Glasgow, Glasgow, United Kingdom"],"raw_orcid":"https://orcid.org/0009-0001-5735-8674","affiliations":[{"raw_affiliation_string":"University of Glasgow, Glasgow, United Kingdom","institution_ids":["https://openalex.org/I7882870"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kaiwen Zheng","orcid":"https://orcid.org/0009-0007-2516-8407"},"institutions":[{"id":"https://openalex.org/I7882870","display_name":"University of Glasgow","ror":"https://ror.org/00vtgdb53","country_code":"GB","type":"education","lineage":["https://openalex.org/I7882870"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Kaiwen Zheng","raw_affiliation_strings":["University of Glasgow, Glasgow, United Kingdom"],"raw_orcid":"https://orcid.org/0009-0007-2516-8407","affiliations":[{"raw_affiliation_string":"University of Glasgow, Glasgow, United Kingdom","institution_ids":["https://openalex.org/I7882870"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yaoqin He","orcid":"https://orcid.org/0009-0004-7131-4581"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yaoqin He","raw_affiliation_strings":["Tomorrow Advancing Life, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0004-7131-4581","affiliations":[{"raw_affiliation_string":"Tomorrow Advancing Life, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012498345","display_name":"Junchen Fu","orcid":"https://orcid.org/0000-0003-4759-2042"},"institutions":[{"id":"https://openalex.org/I7882870","display_name":"University of Glasgow","ror":"https://ror.org/00vtgdb53","country_code":"GB","type":"education","lineage":["https://openalex.org/I7882870"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Junchen Fu","raw_affiliation_strings":["University of Glasgow, Glasgow, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0003-4759-2042","affiliations":[{"raw_affiliation_string":"University of Glasgow, Glasgow, United Kingdom","institution_ids":["https://openalex.org/I7882870"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069702331","display_name":"Joemon M. Jose","orcid":"https://orcid.org/0000-0001-9228-1759"},"institutions":[{"id":"https://openalex.org/I7882870","display_name":"University of Glasgow","ror":"https://ror.org/00vtgdb53","country_code":"GB","type":"education","lineage":["https://openalex.org/I7882870"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Joemon M. Jose","raw_affiliation_strings":["University of Glasgow, Glasgow, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0001-9228-1759","affiliations":[{"raw_affiliation_string":"University of Glasgow, Glasgow, United Kingdom","institution_ids":["https://openalex.org/I7882870"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5026473068"],"corresponding_institution_ids":["https://openalex.org/I154099455"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08896081,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2153","last_page":"2160"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7108539342880249},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5952051281929016},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5554507374763489},{"id":"https://openalex.org/keywords/image-matching","display_name":"Image matching","score":0.5041521787643433},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.49536073207855225},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.473625123500824},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37002840638160706},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.33351701498031616},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.32208603620529175},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11907151341438293},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.06645014882087708}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7108539342880249},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5952051281929016},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5554507374763489},{"id":"https://openalex.org/C2986492983","wikidata":"https://www.wikidata.org/wiki/Q861092","display_name":"Image matching","level":3,"score":0.5041521787643433},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.49536073207855225},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.473625123500824},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37002840638160706},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.33351701498031616},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32208603620529175},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11907151341438293},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.06645014882087708}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3701716.3717564","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3701716.3717564","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3701716.3717564","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.gla.ac.uk:352098","is_oa":true,"landing_page_url":"https://eprints.gla.ac.uk/352098/1/352098.pdf","pdf_url":"https://eprints.gla.ac.uk/352098/2/352098.pdf","source":{"id":"https://openalex.org/S4210235606","display_name":"ENLIGHTEN (Jurnal Bimbingan dan Konseling Islam)","issn_l":"2622-8912","issn":["2622-8912","2622-8920"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":{"id":"doi:10.1145/3701716.3717564","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3701716.3717564","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3701716.3717564","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4410636376.pdf","grobid_xml":"https://content.openalex.org/works/W4410636376.grobid-xml"},"referenced_works_count":26,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1933349210","https://openalex.org/W2064675550","https://openalex.org/W2131774270","https://openalex.org/W2185175083","https://openalex.org/W2194775991","https://openalex.org/W2277195237","https://openalex.org/W2546696630","https://openalex.org/W2606473278","https://openalex.org/W2745461083","https://openalex.org/W2778100917","https://openalex.org/W2778940641","https://openalex.org/W2962964995","https://openalex.org/W2963389687","https://openalex.org/W2963758027","https://openalex.org/W2964120214","https://openalex.org/W2966248453","https://openalex.org/W2982078236","https://openalex.org/W2990571534","https://openalex.org/W3035552787","https://openalex.org/W3035605030","https://openalex.org/W3189718596","https://openalex.org/W3206477365","https://openalex.org/W4392977092"],"related_works":["https://openalex.org/W2384918310","https://openalex.org/W2383808867","https://openalex.org/W2372581239","https://openalex.org/W2107893065","https://openalex.org/W2617958085","https://openalex.org/W1509862229","https://openalex.org/W1974208548","https://openalex.org/W2050706403","https://openalex.org/W1973922169","https://openalex.org/W1519745258"],"abstract_inverted_index":{"Image-text":[0],"matching":[1,20,155],"is":[2,41,139,157],"a":[3,83,91,132,135,153],"typical":[4],"cross-modal":[5],"task,":[6],"which":[7,62],"has":[8],"recently":[9],"attracted":[10],"great":[11],"interest":[12],"in":[13,70],"multimedia":[14],"and":[15,34,53,60,80,107,117,145,150,163,167,182,196],"computer":[16],"vision.":[17],"Previous":[18],"image-text":[19,44,73],"methods":[21],"mostly":[22],"rely":[23],"on":[24,190],"coarse":[25,47],"appearance":[26],"features":[27],"to":[28,76,114,141,159],"guide":[29],"the":[30,67,72,99,103,121,128,143,161,175,180],"learning":[31],"of":[32,105,148],"image":[33,130,149],"text":[35],"monotonous":[36],"representations":[37,50,111],"whereby":[38],"attention-aware":[39,169],"mechanism":[40],"introduced":[42],"for":[43,57],"matching.":[45],"Such":[46],"feature":[48],"guided":[49],"lack":[51],"fine-grained":[52,84,106,144],"diverse":[54,108],"semantic":[55],"information":[56],"linking":[58],"images":[59,79,116],"sentences":[61],"are":[63,124],"widely":[64],"regarded":[65],"as":[66,98,112],"important":[68],"cues":[69],"aligning":[71],"pairs,":[74],"leading":[75],"mismatches":[77],"between":[78,165],"texts":[81],"at":[82],"level.":[85],"In":[86,119],"this":[87],"paper,":[88],"we":[89],"propose":[90],"novel":[92],"Densecap-guided":[93],"Attention":[94],"Network,":[95],"termed":[96],"DAN,":[97],"bridge":[100],"that":[101,174],"allows":[102],"integration":[104],"dense":[109,122],"caption":[110],"mediation":[113],"link":[115],"texts.":[118],"particular,":[120],"captions":[123],"first":[125],"extracted":[126],"from":[127],"given":[129],"by":[131],"densecap-parser.":[133],"Then,":[134],"densecap-guided":[136],"attention":[137],"module":[138],"designed":[140],"mine":[142],"discriminate":[146],"correspondence":[147],"sentence.":[151],"Finally,":[152],"graph-structured":[154],"network":[156],"utilized":[158],"learn":[160],"associations":[162],"alignments":[164],"visual":[166],"textual":[168],"features.":[170],"Quantitative":[171],"results":[172],"show":[173],"proposed":[176],"DAN":[177],"can":[178],"outperform":[179],"state-of-the-art":[181],"alternative":[183],"approaches":[184],"under":[185],"various":[186],"standard":[187],"evaluation":[188],"metrics":[189],"two":[191],"public":[192],"benchmarks,":[193],"Microsoft":[194],"COCO":[195],"Flickr30K.":[197]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
