{"id":"https://openalex.org/W4385767402","doi":"https://doi.org/10.24963/ijcai.2023/93","title":"Diagram Visual Grounding: Learning to See with Gestalt-Perceptual Attention","display_name":"Diagram Visual Grounding: Learning to See with Gestalt-Perceptual Attention","publication_year":2023,"publication_date":"2023-08-01","ids":{"openalex":"https://openalex.org/W4385767402","doi":"https://doi.org/10.24963/ijcai.2023/93"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2023/93","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/93","pdf_url":"https://www.ijcai.org/proceedings/2023/0093.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2023/0093.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068650842","display_name":"Xin Hu","orcid":"https://orcid.org/0000-0001-7574-3931"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Hu","raw_affiliation_strings":["Xi\u2019an Jiaotong University","Shaanxi Provincial Key Laboratory of Big Data Knowledge Engineering, School of Computer Science and Technology, Xi'an Jiaotong University, China","National Engineering Lab for Big Data Analytics, Xi'an Jiaotong University, China"],"affiliations":[{"raw_affiliation_string":"Xi\u2019an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"Shaanxi Provincial Key Laboratory of Big Data Knowledge Engineering, School of Computer Science and Technology, Xi'an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"National Engineering Lab for Big Data Analytics, Xi'an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041143430","display_name":"Lingling Zhang","orcid":"https://orcid.org/0000-0001-5070-8523"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lingling Zhang","raw_affiliation_strings":["Xi'an Jiaotong University","Shaanxi Provincial Key Laboratory of Big Data Knowledge Engineering, School of Computer Science and Technology, Xi'an Jiaotong University, China","National Engineering Lab for Big Data Analytics, Xi'an Jiaotong University, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"Shaanxi Provincial Key Laboratory of Big Data Knowledge Engineering, School of Computer Science and Technology, Xi'an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"National Engineering Lab for Big Data Analytics, Xi'an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100361857","display_name":"Jun Liu","orcid":"https://orcid.org/0000-0002-4365-4165"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Liu","raw_affiliation_strings":["Xi'an Jiaotong Univerisity","National Engineering Lab for Big Data Analytics, Xi'an Jiaotong University, China","Shaanxi Provincial Key Laboratory of Big Data Knowledge Engineering, School of Computer Science and Technology, Xi'an Jiaotong University, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong Univerisity","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"National Engineering Lab for Big Data Analytics, Xi'an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"Shaanxi Provincial Key Laboratory of Big Data Knowledge Engineering, School of Computer Science and Technology, Xi'an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100390695","display_name":"Xinyu Zhang","orcid":"https://orcid.org/0000-0002-4335-682X"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinyu Zhang","raw_affiliation_strings":["Xi'an Jiaotong University"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101798145","display_name":"Wenjun Wu","orcid":"https://orcid.org/0000-0002-1627-5880"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenjun Wu","raw_affiliation_strings":["Xi'an Jiaotong University","National Engineering Lab for Big Data Analytics, Xi'an Jiaotong University, China","Shaanxi Provincial Key Laboratory of Big Data Knowledge Engineering, School of Computer Science and Technology, Xi'an Jiaotong University, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"National Engineering Lab for Big Data Analytics, Xi'an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"Shaanxi Provincial Key Laboratory of Big Data Knowledge Engineering, School of Computer Science and Technology, Xi'an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5115694806","display_name":"Qianying Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210156165","display_name":"Lenovo (China)","ror":"https://ror.org/04srd9d93","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210156165"]},{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qianying Wang","raw_affiliation_strings":["Lenovo Research","Lenovo Research, Beijing, China","Shaanxi Provincial Key Laboratory of Big Data Knowledge Engineering, School of Computer Science and Technology, Xi'an Jiaotong University, China","National Engineering Lab for Big Data Analytics, Xi'an Jiaotong University, China"],"affiliations":[{"raw_affiliation_string":"Lenovo Research","institution_ids":["https://openalex.org/I4210156165"]},{"raw_affiliation_string":"Lenovo Research, Beijing, China","institution_ids":["https://openalex.org/I4210156165"]},{"raw_affiliation_string":"Shaanxi Provincial Key Laboratory of Big Data Knowledge Engineering, School of Computer Science and Technology, Xi'an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"National Engineering Lab for Big Data Analytics, Xi'an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5041143430"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":0.2377,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.51586049,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"837","last_page":"845"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/gestalt-psychology","display_name":"Gestalt psychology","score":0.8202036619186401},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7423381805419922},{"id":"https://openalex.org/keywords/schematic","display_name":"Schematic","score":0.5096421241760254},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.506846010684967},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4384106695652008},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.42981332540512085},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.32920071482658386}],"concepts":[{"id":"https://openalex.org/C27362006","wikidata":"https://www.wikidata.org/wiki/Q272021","display_name":"Gestalt psychology","level":3,"score":0.8202036619186401},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7423381805419922},{"id":"https://openalex.org/C192328126","wikidata":"https://www.wikidata.org/wiki/Q4514647","display_name":"Schematic","level":2,"score":0.5096421241760254},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.506846010684967},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4384106695652008},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.42981332540512085},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.32920071482658386},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2023/93","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/93","pdf_url":"https://www.ijcai.org/proceedings/2023/0093.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2023/93","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/93","pdf_url":"https://www.ijcai.org/proceedings/2023/0093.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.8199999928474426}],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2369050462","display_name":null,"funder_award_id":"2022YFC3303600","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G2376276132","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G2597775472","display_name":null,"funder_award_id":"21013","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3250154588","display_name":null,"funder_award_id":"62106190","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4303215675","display_name":null,"funder_award_id":"62192781","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4896784468","display_name":null,"funder_award_id":"62137002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5122973651","display_name":null,"funder_award_id":"62250066","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5675968330","display_name":null,"funder_award_id":"42101210","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5749106617","display_name":null,"funder_award_id":"xhj032021013","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G5853313636","display_name":null,"funder_award_id":"Knowledge","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5939423041","display_name":null,"funder_award_id":"Technology","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6311837223","display_name":null,"funder_award_id":"62293553","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6387986177","display_name":null,"funder_award_id":"61721002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7174558747","display_name":null,"funder_award_id":"Group","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7758118313","display_name":null,"funder_award_id":"61937001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8050651220","display_name":null,"funder_award_id":"202101","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8246171347","display_name":null,"funder_award_id":"2021013","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8679641091","display_name":null,"funder_award_id":"6172100","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8764071032","display_name":null,"funder_award_id":"2022YFC","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G8928869386","display_name":null,"funder_award_id":"xhj032021013-02","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321169","display_name":"Xi\u2019an Jiaotong University","ror":"https://ror.org/017zhmm22"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4385767402.pdf"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W126405584","https://openalex.org/W639708223","https://openalex.org/W1965372883","https://openalex.org/W1969294188","https://openalex.org/W2040553845","https://openalex.org/W2194775991","https://openalex.org/W2787839673","https://openalex.org/W2884585870","https://openalex.org/W2896457183","https://openalex.org/W2904910963","https://openalex.org/W2946086442","https://openalex.org/W2949250467","https://openalex.org/W2951659295","https://openalex.org/W2962766617","https://openalex.org/W2963037989","https://openalex.org/W2964022527","https://openalex.org/W2986755220","https://openalex.org/W2986803748","https://openalex.org/W3019546484","https://openalex.org/W3034772468","https://openalex.org/W3094502228","https://openalex.org/W3096609285","https://openalex.org/W3110435696","https://openalex.org/W3138716934","https://openalex.org/W3171547673","https://openalex.org/W3200998783","https://openalex.org/W3207127495","https://openalex.org/W4225165483","https://openalex.org/W4283030109","https://openalex.org/W4285216485","https://openalex.org/W4287255256","https://openalex.org/W4313145013","https://openalex.org/W4382240596"],"related_works":["https://openalex.org/W3154683910","https://openalex.org/W1846734616","https://openalex.org/W2359532622","https://openalex.org/W1593175091","https://openalex.org/W3134543635","https://openalex.org/W2921318524","https://openalex.org/W3039810647","https://openalex.org/W2377571686","https://openalex.org/W4380988671","https://openalex.org/W2120175042"],"abstract_inverted_index":{"Diagram":[0],"visual":[1,46,55,69,92,101],"grounding":[2],"aims":[3],"to":[4,66,82,109,131,142,162],"capture":[5],"the":[6,15,23,51,67,73,84,96,112,116,133,144,156,164,167,182,189],"correlation":[7],"between":[8,53,135],"language":[9,88,138],"expression":[10],"and":[11,17,29,38,56,87,137,153,176],"local":[12],"objects":[13,86],"in":[14,22,43],"diagram,":[16],"plays":[18],"an":[19],"important":[20],"role":[21],"applications":[24],"like":[25],"textbook":[26],"question":[27],"answering":[28],"cross-modal":[30],"retrieval.":[31],"Most":[32],"diagrams":[33,136,175],"consist":[34],"of":[35,60,146,166],"several":[36],"colors":[37],"simple":[39],"geometries.":[40],"This":[41],"results":[42],"sparse":[44],"low-level":[45,54,91],"features,":[47,93,123],"which":[48],"further":[49],"aggravates":[50],"gap":[52],"high-level":[57,121],"semantic":[58,122],"features":[59,113,152],"diagrams.":[61,147],"The":[62],"phenomenon":[63],"brings":[64],"challenges":[65],"diagram":[68,85,151],"grounding.":[70],"To":[71],"solve":[72],"above":[74],"issues,":[75],"we":[76,103,124,179],"propose":[77],"a":[78,105,126],"gestalt-perceptual":[79],"attention":[80,129],"model":[81,184],"align":[83],"expressions.":[89],"For":[90,120],"inspired":[94],"by":[95,115,150],"gestalt":[97],"that":[98,181],"simulates":[99],"human":[100],"system,":[102],"build":[104],"gestalt-perception":[106],"graph":[107],"network":[108],"make":[110],"up":[111],"learned":[114],"traditional":[117],"backbone":[118],"network.":[119],"design":[125],"multi-modal":[127],"context":[128],"mechanism":[130],"facilitate":[132],"interaction":[134],"expressions,":[139],"so":[140],"as":[141],"enhance":[143],"semantics":[145],"Finally,":[148],"guided":[149],"linguistic":[154],"embedding,":[155],"target":[157],"query":[158],"is":[159],"gradually":[160],"decoded":[161],"generate":[163],"coordinates":[165],"referred":[168],"object.":[169],"By":[170],"conducting":[171],"comprehensive":[172],"experiments":[173],"on":[174],"natural":[177],"images,":[178],"demonstrate":[180],"proposed":[183],"achieves":[185],"superior":[186],"performance":[187],"over":[188],"competitors.":[190],"Our":[191],"code":[192],"will":[193],"be":[194],"released":[195],"at":[196],"https://github.com/AIProCode/GPA.":[197]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
