{"id":"https://openalex.org/W4414360016","doi":"https://doi.org/10.24963/ijcai.2025/246","title":"Multimodal Image Matching Based on Cross-Modality Completion Pre-training","display_name":"Multimodal Image Matching Based on Cross-Modality Completion Pre-training","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W4414360016","doi":"https://doi.org/10.24963/ijcai.2025/246"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2025/246","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/246","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006346707","display_name":"Meng Yang","orcid":"https://orcid.org/0000-0003-3036-0667"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Meng Yang","raw_affiliation_strings":["Wuhan University"],"affiliations":[{"raw_affiliation_string":"Wuhan University","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100366838","display_name":"Fan Fan","orcid":"https://orcid.org/0000-0002-6212-1516"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fan Fan","raw_affiliation_strings":["Wuhan University"],"affiliations":[{"raw_affiliation_string":"Wuhan University","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024412360","display_name":"Jun Huang","orcid":"https://orcid.org/0000-0001-9190-9233"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Huang","raw_affiliation_strings":["Wuhan University"],"affiliations":[{"raw_affiliation_string":"Wuhan University","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002618865","display_name":"Yong Ma","orcid":"https://orcid.org/0000-0002-1116-0662"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Ma","raw_affiliation_strings":["Wuhan University"],"affiliations":[{"raw_affiliation_string":"Wuhan University","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021381864","display_name":"Xiaoguang Mei","orcid":"https://orcid.org/0000-0002-0239-8580"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoguang Mei","raw_affiliation_strings":["Wuhan University"],"affiliations":[{"raw_affiliation_string":"Wuhan University","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011596136","display_name":"Zhanchuan Cai","orcid":"https://orcid.org/0000-0002-6954-7691"},"institutions":[{"id":"https://openalex.org/I111950717","display_name":"Macau University of Science and Technology","ror":"https://ror.org/03jqs2n27","country_code":"MO","type":"education","lineage":["https://openalex.org/I111950717","https://openalex.org/I4391767947"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Zhanchuan Cai","raw_affiliation_strings":["Macau University of Science and Technology"],"affiliations":[{"raw_affiliation_string":"Macau University of Science and Technology","institution_ids":["https://openalex.org/I111950717"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040010053","display_name":"Jiayi Ma","orcid":"https://orcid.org/0000-0003-3264-3265"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiayi Ma","raw_affiliation_strings":["Wuhan University"],"affiliations":[{"raw_affiliation_string":"Wuhan University","institution_ids":["https://openalex.org/I37461747"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5006346707"],"corresponding_institution_ids":["https://openalex.org/I37461747"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26727734,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2206","last_page":"2214"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9532999992370605,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9532999992370605,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9491000175476074,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.6542999744415283},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5507000088691711},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5418000221252441},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.49380001425743103},{"id":"https://openalex.org/keywords/image-matching","display_name":"Image matching","score":0.42080000042915344},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4074000120162964},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.37389999628067017},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.35260000824928284}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7494999766349792},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.746999979019165},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.6542999744415283},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5507000088691711},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5418000221252441},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.49380001425743103},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4611000120639801},{"id":"https://openalex.org/C2986492983","wikidata":"https://www.wikidata.org/wiki/Q861092","display_name":"Image matching","level":3,"score":0.42080000042915344},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4074000120162964},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.37389999628067017},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.35260000824928284},{"id":"https://openalex.org/C166704113","wikidata":"https://www.wikidata.org/wiki/Q861092","display_name":"Image registration","level":3,"score":0.31700000166893005},{"id":"https://openalex.org/C158096908","wikidata":"https://www.wikidata.org/wiki/Q3983303","display_name":"Template matching","level":3,"score":0.3037000000476837},{"id":"https://openalex.org/C2778493491","wikidata":"https://www.wikidata.org/wiki/Q7449072","display_name":"Semantic matching","level":3,"score":0.29679998755455017},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.2955999970436096},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.29030001163482666},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.2831999957561493},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2619999945163727},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2612999975681305},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.250900000333786}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2025/246","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/246","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,74],"differences":[1,11,138],"in":[2,93],"imaging":[3],"devices":[4],"cause":[5],"multimodal":[6,25,34,60,100],"images":[7,26],"to":[8,28,69,90,115,128,136],"have":[9],"modal":[10,137],"and":[12,84,119],"geometric":[13,117],"distortions,":[14],"complicating":[15],"the":[16,29,105,126],"matching":[17,21,92,102,113],"task.":[18],"Deep":[19],"learning-based":[20],"methods":[22],"struggle":[23],"with":[24],"due":[27],"lack":[30],"of":[31],"large":[32,141],"annotated":[33,142],"datasets.":[35,143,153],"To":[36],"address":[37],"these":[38],"challenges,":[39],"we":[40],"propose":[41],"XCP-Match":[42,48,109,147],"based":[43,57,103],"on":[44,58,104,151],"cross-modality":[45,54,82],"completion":[46,55],"pre-training.":[47],"has":[49],"two":[50],"phases.":[51],"(1)":[52],"Self-supervised":[53],"pre-training":[56,67,75],"real":[59],"image":[61,78,101],"dataset.":[62,108],"We":[63],"develop":[64],"a":[65,111],"novel":[66],"model":[68,127],"learn":[70,129],"cross-modal":[71,131],"semantic":[72,132],"features.":[73],"uses":[76],"masked":[77],"modeling":[79],"method":[80],"for":[81,99],"completion,":[83],"introduces":[85],"an":[86],"attention-weighted":[87],"contrastive":[88],"loss":[89],"emphasize":[91],"overlapping":[94],"areas.":[95],"(2)":[96],"Supervised":[97],"fine-tuning":[98],"augmented":[106],"MegaDepth":[107],"constructs":[110],"complete":[112],"framework":[114],"overcome":[116],"distortions":[118],"achieve":[120],"precise":[121],"matching.":[122],"Two-phase":[123],"training":[124],"encourages":[125],"deep":[130],"information,":[133],"improving":[134],"adaptation":[135],"without":[139],"needing":[140],"Experiments":[144],"demonstrate":[145],"that":[146],"outperforms":[148],"existing":[149],"algorithms":[150],"public":[152]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
