{"id":"https://openalex.org/W4410770232","doi":"https://doi.org/10.1109/icrca64997.2025.11011051","title":"Multi-Scale Feature Fusion Based on Improved YOLOv8 Networks for Visual Relationship Detection","display_name":"Multi-Scale Feature Fusion Based on Improved YOLOv8 Networks for Visual Relationship Detection","publication_year":2025,"publication_date":"2025-03-07","ids":{"openalex":"https://openalex.org/W4410770232","doi":"https://doi.org/10.1109/icrca64997.2025.11011051"},"language":"en","primary_location":{"id":"doi:10.1109/icrca64997.2025.11011051","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icrca64997.2025.11011051","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 9th International Conference on Robotics, Control and Automation (ICRCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100725686","display_name":"Yixuan Chen","orcid":"https://orcid.org/0009-0005-5915-3743"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yixuan Chen","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,School of Information and Communication Engineering,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,School of Information and Communication Engineering,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101073611","display_name":"Wensheng Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wensheng Sun","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,School of Information and Communication Engineering,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,School of Information and Communication Engineering,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100725686"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":1.319,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.80969617,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"371","last_page":"375"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9650999903678894,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9650999903678894,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12389","display_name":"Infrared Target Detection Methodologies","score":0.9212999939918518,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7088762521743774},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6831111311912537},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5877742171287537},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5851370096206665},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5572260618209839},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.5088789463043213},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.45063158869743347},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4324500262737274},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.05397334694862366}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7088762521743774},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6831111311912537},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5877742171287537},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5851370096206665},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5572260618209839},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.5088789463043213},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.45063158869743347},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4324500262737274},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.05397334694862366},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icrca64997.2025.11011051","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icrca64997.2025.11011051","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 9th International Conference on Robotics, Control and Automation (ICRCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W2963037989","https://openalex.org/W2976260559","https://openalex.org/W2998506933","https://openalex.org/W4210580326","https://openalex.org/W4320481420","https://openalex.org/W4388188285","https://openalex.org/W4400986275","https://openalex.org/W4401517872","https://openalex.org/W4401611699","https://openalex.org/W4402480081"],"related_works":["https://openalex.org/W2601157893","https://openalex.org/W2373006798","https://openalex.org/W2131735617","https://openalex.org/W2056912418","https://openalex.org/W2033213769","https://openalex.org/W4312376745","https://openalex.org/W2136016640","https://openalex.org/W2049538278","https://openalex.org/W2886173746","https://openalex.org/W4200043248"],"abstract_inverted_index":{"Despite":[0],"its":[1],"high":[2],"efficiency":[3],"in":[4,10,19,58,132,143,149,161],"real-time":[5],"detection,":[6],"YOLOv8":[7,46,160],"faces":[8],"limitations":[9],"dense":[11],"scenes":[12],"with":[13,67,139],"many":[14],"overlapping":[15],"objects.":[16,91],"Its":[17],"deficiencies":[18],"multi-scale":[20,104],"feature":[21,25],"handling":[22],"and":[23,54,73,84,106,145,159],"deep":[24],"extraction":[26],"affect":[27],"the":[28,68,82,107,115,125],"recognition":[29],"of":[30],"subtle":[31],"scene":[32],"differences.":[33],"This":[34],"paper":[35],"proposes":[36],"RelYOLOv8,":[37],"an":[38],"enhanced":[39],"visual":[40,55],"relation":[41,56],"detection":[42,52,86,133,151],"model":[43],"based":[44],"on":[45,124],"network,":[47],"aiming":[48],"to":[49,77,101,113,118,137],"improve":[50],"object":[51],"accuracy":[53,134,144],"reasoning":[57],"complex":[59,162],"scenes.":[60],"It":[61],"replaces":[62],"YOLOv8's":[63],"original":[64],"C2f":[65],"module":[66,98],"C2f_Lite":[69],"module,":[70],"combining":[71],"standard":[72],"depthwise":[74],"separable":[75],"convolution":[76],"optimize":[78],"computational":[79],"efficiency,":[80],"reduce":[81],"load,":[83],"enhance":[85],"performance,":[87],"especially":[88],"for":[89],"small":[90],"The":[92],"Atrous":[93],"Spatial":[94],"Pyramid":[95],"Pooling":[96],"(ASPP)":[97],"is":[99,111],"added":[100],"better":[102],"handle":[103],"features,":[105],"Slide":[108],"Loss":[109],"function":[110],"integrated":[112],"strengthen":[114],"model's":[116],"ability":[117],"distinguish":[119],"difficult":[120],"samples.":[121],"Experimental":[122],"results":[123],"VRD":[126],"dataset":[127],"show":[128],"a":[129,140,146],"marked":[130],"increase":[131],"from":[135],"0.716":[136],"0.861,":[138],"14.5%":[141],"improvement":[142],"14.2%":[147],"reduction":[148],"missed":[150],"rates.":[152],"Moreover,":[153],"RelYOLOv8":[154],"clearly":[155],"outperforms":[156],"Faster":[157],"R-CNN":[158],"scenarios.":[163]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
