{"id":"https://openalex.org/W4200120588","doi":"https://doi.org/10.1109/ictc52510.2021.9620954","title":"High-level Image Classification by Synergizing Image Captioning with BERT","display_name":"High-level Image Classification by Synergizing Image Captioning with BERT","publication_year":2021,"publication_date":"2021-10-20","ids":{"openalex":"https://openalex.org/W4200120588","doi":"https://doi.org/10.1109/ictc52510.2021.9620954"},"language":"en","primary_location":{"id":"doi:10.1109/ictc52510.2021.9620954","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ictc52510.2021.9620954","pdf_url":null,"source":{"id":"https://openalex.org/S4363607766","display_name":"2021 International Conference on Information and Communication Technology Convergence (ICTC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Conference on Information and Communication Technology Convergence (ICTC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022655938","display_name":"Xiaohong Yu","orcid":"https://orcid.org/0000-0001-8982-0328"},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Xiaohong Yu","raw_affiliation_strings":["Department of Computer Science & Engineering, Sungkyunkwan University, Suwon, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science & Engineering, Sungkyunkwan University, Suwon, Republic of Korea","institution_ids":["https://openalex.org/I848706"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048125898","display_name":"Yoseop Ahn","orcid":null},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Yoseop Ahn","raw_affiliation_strings":["Department of Computer Science & Engineering, Sungkyunkwan University, Suwon, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science & Engineering, Sungkyunkwan University, Suwon, Republic of Korea","institution_ids":["https://openalex.org/I848706"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050526299","display_name":"Jaehoon Jeong","orcid":"https://orcid.org/0000-0001-8490-758X"},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jaehoon Jeong","raw_affiliation_strings":["Department of Computer Science & Engineering, Sungkyunkwan University, Suwon, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science & Engineering, Sungkyunkwan University, Suwon, Republic of Korea","institution_ids":["https://openalex.org/I848706"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5022655938"],"corresponding_institution_ids":["https://openalex.org/I848706"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.11909562,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1686","last_page":"1690"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9830999970436096,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9461212158203125},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8165090084075928},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6646557450294495},{"id":"https://openalex.org/keywords/contextual-image-classification","display_name":"Contextual image classification","score":0.5687325596809387},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5300255417823792},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4513194262981415},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4285096526145935},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4235735237598419},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.41070666909217834},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3985598385334015},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.35678189992904663}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9461212158203125},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8165090084075928},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6646557450294495},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.5687325596809387},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5300255417823792},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4513194262981415},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4285096526145935},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4235735237598419},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.41070666909217834},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3985598385334015},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.35678189992904663},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ictc52510.2021.9620954","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ictc52510.2021.9620954","pdf_url":null,"source":{"id":"https://openalex.org/S4363607766","display_name":"2021 International Conference on Information and Communication Technology Convergence (ICTC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Conference on Information and Communication Technology Convergence (ICTC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.800000011920929,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G263811790","display_name":null,"funder_award_id":"2019-0-01343","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"}],"funders":[{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W1514535095","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W2007339694","https://openalex.org/W2108598243","https://openalex.org/W2183341477","https://openalex.org/W2896457183","https://openalex.org/W2963758027","https://openalex.org/W2964018924","https://openalex.org/W2972006294","https://openalex.org/W3118608800"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4290852288","https://openalex.org/W3088136942","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W2803345361","https://openalex.org/W3009270862"],"abstract_inverted_index":{"Conventional":[0],"image":[1,13,90,95,108,163,185,205],"classification":[2,74,91,149,231],"methods":[3],"mostly":[4],"aim":[5,52],"to":[6,61,111,129,147,213],"classify":[7,130,143],"a":[8,20,85,124,169,203,209,225],"single":[9],"object":[10,17],"in":[11,14,25,76,80,228],"an":[12,16,107],"which":[15,114,166],"often":[18],"occupies":[19],"large":[21],"area.":[22],"However,":[23],"images":[24,55,145],"social":[26],"network":[27],"services":[28],"(SNS)":[29],"are":[30],"more":[31],"complicated.":[32],"They":[33],"usually":[34],"include":[35],"multiple":[36],"objects":[37],"that":[38,220],"have":[39,201],"much":[40],"information,":[41],"such":[42,63,77],"as":[43,64],"people,":[44],"environments,":[45],"and":[46,58,69,97,182],"actions.":[47],"In":[48,138],"this":[49,81,139],"work,":[50],"we":[51,83,105,122,141,167],"at":[53],"understanding":[54],"from":[56,135,197,207],"SNS":[57,211],"classifying":[59],"them":[60],"categories":[62],"fashion,":[65],"traveling,":[66],"education,":[67],"beauty,":[68],"animals.":[70],"To":[71],"improve":[72],"the":[73,94,98,119,131,136,144,148,152,176,184,191],"accuracy":[75],"complicated":[78],"scenario,":[79],"paper,":[82],"propose":[84],"new":[86],"framework":[87,157],"for":[88,180],"high-level":[89],"by":[92],"synergizing":[93],"captioning":[96,109,164],"Natural":[99],"Language":[100],"Processing":[101],"(NLP)":[102],"model.":[103],"First,":[104],"use":[106,123,168],"model":[110,128,174,179,189],"understand":[112],"images,":[113],"generates":[115],"text":[116,133,154],"descriptions":[117,134],"about":[118],"images.":[120,137],"Second,":[121],"natural":[125],"language":[126],"processing":[127],"generated":[132,153],"way,":[140],"can":[142],"according":[146],"results":[150,218],"of":[151,230],"descriptions.":[155],"Our":[156,217],"includes":[158],"two":[159],"models;":[160],"one":[161],"is":[162,190],"model,":[165,193],"TensorFlow":[170],"based":[171],"visual":[172],"attention":[173],"with":[175],"inception":[177],"V3":[178],"pre-processing":[181],"extracting":[183],"features.":[186],"The":[187],"other":[188],"NLP":[192],"Bidirectional":[194],"Encoder":[195],"Representations":[196],"Transformers":[198],"(BERT).":[199],"We":[200],"built":[202],"labeled":[204],"dataset":[206],"Instagram,":[208],"popular":[210],"platform,":[212],"test":[214],"our":[215,221],"framework.":[216],"show":[219],"proposed":[222],"method":[223],"has":[224],"promising":[226],"performance":[227],"terms":[229],"accuracy.":[232]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-01-13T01:12:25.745995","created_date":"2025-10-10T00:00:00"}
