{"id":"https://openalex.org/W7129486080","doi":"https://doi.org/10.1109/icipw68931.2025.11385896","title":"A Vision-Language Model-Based Scene Graph Generation Method With Directional Relationship Reasoning","display_name":"A Vision-Language Model-Based Scene Graph Generation Method With Directional Relationship Reasoning","publication_year":2025,"publication_date":"2025-09-14","ids":{"openalex":"https://openalex.org/W7129486080","doi":"https://doi.org/10.1109/icipw68931.2025.11385896"},"language":null,"primary_location":{"id":"doi:10.1109/icipw68931.2025.11385896","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icipw68931.2025.11385896","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Image Processing Workshops (ICIPW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033431243","display_name":"Jinyeong Oh","orcid":null},"institutions":[{"id":"https://openalex.org/I4210161052","display_name":"Korea University","ror":"https://ror.org/05m1gnk07","country_code":"JP","type":"education","lineage":["https://openalex.org/I4210161052"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Jinyeong Oh","raw_affiliation_strings":["Korea University,School of Electrical Engineering,Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Korea University,School of Electrical Engineering,Republic of Korea","institution_ids":["https://openalex.org/I4210161052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087273355","display_name":"Eunbeen Kim","orcid":"https://orcid.org/0000-0001-5535-0655"},"institutions":[{"id":"https://openalex.org/I4210161052","display_name":"Korea University","ror":"https://ror.org/05m1gnk07","country_code":"JP","type":"education","lineage":["https://openalex.org/I4210161052"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Eunbeen Kim","raw_affiliation_strings":["Korea University,School of Electrical Engineering,Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Korea University,School of Electrical Engineering,Republic of Korea","institution_ids":["https://openalex.org/I4210161052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066467394","display_name":"Jonghwa Shim","orcid":"https://orcid.org/0000-0001-9738-1038"},"institutions":[{"id":"https://openalex.org/I4210161052","display_name":"Korea University","ror":"https://ror.org/05m1gnk07","country_code":"JP","type":"education","lineage":["https://openalex.org/I4210161052"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Jonghwa Shim","raw_affiliation_strings":["Korea University,School of Electrical Engineering,Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Korea University,School of Electrical Engineering,Republic of Korea","institution_ids":["https://openalex.org/I4210161052"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071337745","display_name":"Eenjun Hwang","orcid":"https://orcid.org/0000-0002-0418-4092"},"institutions":[{"id":"https://openalex.org/I4210161052","display_name":"Korea University","ror":"https://ror.org/05m1gnk07","country_code":"JP","type":"education","lineage":["https://openalex.org/I4210161052"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Eenjun Hwang","raw_affiliation_strings":["Korea University,School of Electrical Engineering,Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Korea University,School of Electrical Engineering,Republic of Korea","institution_ids":["https://openalex.org/I4210161052"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5033431243"],"corresponding_institution_ids":["https://openalex.org/I4210161052"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.70690933,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"398","last_page":"403"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.002300000051036477,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.002099999925121665,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7670999765396118},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.6011000275611877},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.541100025177002},{"id":"https://openalex.org/keywords/predicate","display_name":"Predicate (mathematical logic)","score":0.5218999981880188},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.4880000054836273},{"id":"https://openalex.org/keywords/scene-graph","display_name":"Scene graph","score":0.4684000015258789},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.44839999079704285},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.3643999993801117}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7670999765396118},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6920999884605408},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6894000172615051},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6011000275611877},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.541100025177002},{"id":"https://openalex.org/C140146324","wikidata":"https://www.wikidata.org/wiki/Q1144319","display_name":"Predicate (mathematical logic)","level":2,"score":0.5218999981880188},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.4880000054836273},{"id":"https://openalex.org/C179372163","wikidata":"https://www.wikidata.org/wiki/Q1406181","display_name":"Scene graph","level":3,"score":0.4684000015258789},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.44839999079704285},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3833000063896179},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3684000074863434},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.3643999993801117},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.32820001244544983},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.3100999891757965},{"id":"https://openalex.org/C159246509","wikidata":"https://www.wikidata.org/wiki/Q5428725","display_name":"Factor graph","level":3,"score":0.3061999976634979},{"id":"https://openalex.org/C2777855551","wikidata":"https://www.wikidata.org/wiki/Q12310021","display_name":"Subject (documents)","level":2,"score":0.30309998989105225},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.29019999504089355},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.289900004863739},{"id":"https://openalex.org/C2987255567","wikidata":"https://www.wikidata.org/wiki/Q33002955","display_name":"Knowledge graph","level":2,"score":0.26930001378059387},{"id":"https://openalex.org/C88230418","wikidata":"https://www.wikidata.org/wiki/Q131476","display_name":"Graph theory","level":2,"score":0.26179999113082886},{"id":"https://openalex.org/C3746660","wikidata":"https://www.wikidata.org/wiki/Q1068763","display_name":"Rule of inference","level":2,"score":0.25440001487731934},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icipw68931.2025.11385896","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icipw68931.2025.11385896","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Image Processing Workshops (ICIPW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W2277195237","https://openalex.org/W2963536419","https://openalex.org/W2963938081","https://openalex.org/W3034538190","https://openalex.org/W3096609285","https://openalex.org/W3138516171","https://openalex.org/W3168955054","https://openalex.org/W4214942454","https://openalex.org/W4312956471","https://openalex.org/W4366352717","https://openalex.org/W4385245566","https://openalex.org/W4386071767","https://openalex.org/W4388816994","https://openalex.org/W4402715983","https://openalex.org/W4402727472","https://openalex.org/W4402727672","https://openalex.org/W4402754270","https://openalex.org/W4404612908","https://openalex.org/W4404792503","https://openalex.org/W4415795196"],"related_works":[],"abstract_inverted_index":{"Scene":[0],"Graph":[1],"Generation":[2],"(SGG)":[3],"generates":[4,98],"graphs":[5],"from":[6,72],"visual":[7],"scenes":[8],"in":[9],"the":[10,40,80,125,129],"form":[11],"of":[12],"{subject-predicate-object}":[13],"triplets.":[14],"Traditional":[15],"SGG":[16,49],"methods":[17,127],"rely":[18],"on":[19,128],"fully":[20],"supervised":[21],"training,":[22],"requiring":[23],"annotated":[24],"datasets":[25],"for":[26,39],"new":[27],"objects":[28],"and":[29,86,89,102,110,133,141],"predicates.":[30],"They":[31],"also":[32],"ignore":[33],"subject-object":[34,108],"directionality,":[35],"generating":[36],"inappropriate":[37],"predicates":[38,104],"subject.":[41,95],"In":[42],"this":[43],"paper,":[44],"we":[45],"propose":[46],"a":[47,53,59],"novel":[48],"method":[50,118],"that":[51,137],"combines":[52],"pretrained":[54],"vision-language":[55],"object":[56,67,81,87,92,142],"detector":[57,65],"with":[58,113],"lightweight":[60],"relationship":[61,100],"inference":[62,77],"module.":[63],"The":[64,76,116],"performs":[66],"detection":[68],"by":[69,105,121],"leveraging":[70],"knowledge":[71],"extensive":[73],"image-text":[74],"pairs.":[75],"module":[78],"projects":[79],"representations":[82,101],"into":[83],"distinct":[84],"subject":[85,140],"embeddings":[88],"selects":[90],"relevant":[91],"candidates":[93],"per":[94],"It":[96],"then":[97],"directional":[99],"predicts":[103],"selecting":[106],"appropriate":[107],"pairs":[109],"comparing":[111],"them":[112],"predicate":[114],"embeddings.":[115],"proposed":[117],"improves":[119],"mR@20":[120],"about":[122],"14%":[123],"over":[124],"state-of-the-art":[126],"Visual":[130],"Genome":[131],"dataset":[132],"shows":[134],"qualitative":[135],"results":[136],"better":[138],"distinguish":[139],"roles.":[143]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-02-18T00:00:00"}
