{"id":"https://openalex.org/W4385768038","doi":"https://doi.org/10.24963/ijcai.2023/481","title":"From Association to Generation: Text-only Captioning by Unsupervised Cross-modal Mapping","display_name":"From Association to Generation: Text-only Captioning by Unsupervised Cross-modal Mapping","publication_year":2023,"publication_date":"2023-08-01","ids":{"openalex":"https://openalex.org/W4385768038","doi":"https://doi.org/10.24963/ijcai.2023/481"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2023/481","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/481","pdf_url":"https://www.ijcai.org/proceedings/2023/0481.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2023/0481.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100722015","display_name":"Junyang Wang","orcid":"https://orcid.org/0000-0001-8334-4009"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Junyang Wang","raw_affiliation_strings":["School of Computer and Information Technology & Beijing Key Lab of Traffc Data Analysis and Mining, Beijing Jiaotong University","School of Computer and Information Technology & Beijing Key Lab of Traffic Data Analysis and Mining, Beijing Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer and Information Technology & Beijing Key Lab of Traffc Data Analysis and Mining, Beijing Jiaotong University","institution_ids":["https://openalex.org/I21193070"]},{"raw_affiliation_string":"School of Computer and Information Technology & Beijing Key Lab of Traffic Data Analysis and Mining, Beijing Jiaotong University","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100452477","display_name":"Ming Yan","orcid":"https://orcid.org/0000-0003-4959-8878"},"institutions":[{"id":"https://openalex.org/I4210086143","display_name":"Alibaba Group (Cayman Islands)","ror":"https://ror.org/00mnrxf72","country_code":"KY","type":"company","lineage":["https://openalex.org/I4210086143","https://openalex.org/I45928872"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN","KY"],"is_corresponding":false,"raw_author_name":"Ming Yan","raw_affiliation_strings":["DAMO Academy, Alibaba Group","Peng Cheng Lab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DAMO Academy, Alibaba Group","institution_ids":["https://openalex.org/I4210086143"]},{"raw_affiliation_string":"Peng Cheng Lab","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100388333","display_name":"Yi Zhang","orcid":"https://orcid.org/0000-0003-4299-1511"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Zhang","raw_affiliation_strings":["School of Computer and Information Technology & Beijing Key Lab of Traffc Data Analysis and Mining, Beijing Jiaotong University","School of Computer and Information Technology & Beijing Key Lab of Traffic Data Analysis and Mining, Beijing Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer and Information Technology & Beijing Key Lab of Traffc Data Analysis and Mining, Beijing Jiaotong University","institution_ids":["https://openalex.org/I21193070"]},{"raw_affiliation_string":"School of Computer and Information Technology & Beijing Key Lab of Traffic Data Analysis and Mining, Beijing Jiaotong University","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023834030","display_name":"Jitao Sang","orcid":"https://orcid.org/0000-0002-0699-3205"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jitao Sang","raw_affiliation_strings":["Peng Cheng Lab","School of Computer and Information Technology & Beijing Key Lab of Traffc Data Analysis and Mining, Beijing Jiaotong University","School of Computer and Information Technology & Beijing Key Lab of Traffic Data Analysis and Mining, Beijing Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Peng Cheng Lab","institution_ids":["https://openalex.org/I4210136793"]},{"raw_affiliation_string":"School of Computer and Information Technology & Beijing Key Lab of Traffc Data Analysis and Mining, Beijing Jiaotong University","institution_ids":["https://openalex.org/I21193070"]},{"raw_affiliation_string":"School of Computer and Information Technology & Beijing Key Lab of Traffic Data Analysis and Mining, Beijing Jiaotong University","institution_ids":["https://openalex.org/I21193070"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100722015"],"corresponding_institution_ids":["https://openalex.org/I21193070"],"apc_list":null,"apc_paid":null,"fwci":1.038,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.78927678,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"4326","last_page":"4334"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9864000082015991,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9188563823699951},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8294662237167358},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.6663028001785278},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6184728741645813},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.4700906276702881},{"id":"https://openalex.org/keywords/association","display_name":"Association (psychology)","score":0.45533058047294617},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.44386667013168335},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4007956385612488},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3948816657066345},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.384502649307251},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3577239513397217},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3346589207649231}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9188563823699951},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8294662237167358},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.6663028001785278},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6184728741645813},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.4700906276702881},{"id":"https://openalex.org/C142853389","wikidata":"https://www.wikidata.org/wiki/Q744778","display_name":"Association (psychology)","level":2,"score":0.45533058047294617},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.44386667013168335},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4007956385612488},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3948816657066345},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.384502649307251},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3577239513397217},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3346589207649231},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2023/481","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/481","pdf_url":"https://www.ijcai.org/proceedings/2023/0481.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2023/481","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/481","pdf_url":"https://www.ijcai.org/proceedings/2023/0481.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5899999737739563,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G106125295","display_name":null,"funder_award_id":"No. 61832002, 62172094","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1573135090","display_name":null,"funder_award_id":"61832002, 62172094","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5219150502","display_name":null,"funder_award_id":"62172094","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8434150956","display_name":null,"funder_award_id":"61832002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4385768038.pdf"},"referenced_works_count":40,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1861492603","https://openalex.org/W1889081078","https://openalex.org/W1956340063","https://openalex.org/W2108325777","https://openalex.org/W2122180654","https://openalex.org/W2133459682","https://openalex.org/W2425121537","https://openalex.org/W2481240925","https://openalex.org/W2506483933","https://openalex.org/W2550553598","https://openalex.org/W2625940279","https://openalex.org/W2745461083","https://openalex.org/W2896457183","https://openalex.org/W2966715458","https://openalex.org/W2970231061","https://openalex.org/W2990069284","https://openalex.org/W2990307191","https://openalex.org/W2997248215","https://openalex.org/W2997591391","https://openalex.org/W3091588028","https://openalex.org/W3100255860","https://openalex.org/W3126337491","https://openalex.org/W3166396011","https://openalex.org/W3173220247","https://openalex.org/W3174377922","https://openalex.org/W3181158454","https://openalex.org/W3217561355","https://openalex.org/W4206875171","https://openalex.org/W4224035735","https://openalex.org/W4225432580","https://openalex.org/W4226352076","https://openalex.org/W4288043238","https://openalex.org/W4307106676","https://openalex.org/W4309134658","https://openalex.org/W4312563428","https://openalex.org/W4312922092","https://openalex.org/W4312924260","https://openalex.org/W4320339901","https://openalex.org/W4385567053"],"related_works":["https://openalex.org/W73545470","https://openalex.org/W4224266612","https://openalex.org/W2383394264","https://openalex.org/W4320153225","https://openalex.org/W4293261942","https://openalex.org/W3125968744","https://openalex.org/W203959209","https://openalex.org/W2110287964","https://openalex.org/W2167701463","https://openalex.org/W4307407935"],"abstract_inverted_index":{"With":[0,144],"the":[1,30,50,77,84,90,98,101,109,117,124,132],"development":[2],"of":[3,33,52,80,86,92,100],"Vision-Language":[4],"Pre-training":[5],"Models":[6],"(VLPMs)":[7],"represented":[8],"by":[9,29],"CLIP":[10,34,38,68,78,87],"and":[11,26,55,83,120,158],"ALIGN,":[12],"significant":[13],"breakthroughs":[14],"have":[15,63],"been":[16],"achieved":[17],"for":[18,58,67,155],"association-based":[19],"visual":[20],"tasks":[21,57],"such":[22],"as":[23],"image":[24,156],"classification":[25],"image-text":[27],"retrieval":[28],"zero-shot":[31,138,153],"capability":[32],"without":[35],"fine-tuning.":[36],"However,":[37],"is":[39,47],"hard":[40],"to":[41,43,49,88,103,113,116,142],"apply":[42],"generation-based":[44],"tasks.":[45],"This":[46],"due":[48],"lack":[51],"decoder":[53],"architecture":[54],"pre-training":[56],"generation.":[59,143],"Although":[60],"previous":[61],"works":[62],"created":[64],"generation":[65],"capacity":[66],"through":[69],"additional":[70],"language":[71,118,125],"models,":[72],"a":[73,137],"modality":[74,119],"gap":[75],"between":[76],"representations":[79],"different":[81],"modalities":[82],"inability":[85],"model":[89],"offset":[91],"this":[93,128],"gap,":[94],"which":[95],"results":[96],"in":[97,152],"failure":[99],"concept":[102],"transfer":[104],"across":[105],"modes.":[106],"To":[107],"solve":[108],"problem,":[110],"we":[111,130],"try":[112],"map":[114],"images/videos":[115],"generate":[121],"captions":[122],"from":[123,140],"modality.":[126],"In":[127],"paper,":[129],"propose":[131],"K-nearest-neighbor":[133],"Cross-modality":[134],"Mapping":[135],"(Knight),":[136],"method":[139],"association":[141],"vision-free":[145],"unsupervised":[146],"training,":[147],"Knight":[148],"achieves":[149],"state-of-the-art":[150],"performance":[151],"methods":[154],"captioning":[157],"video":[159],"captioning.":[160]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":1}],"updated_date":"2026-05-07T13:39:58.223016","created_date":"2025-10-10T00:00:00"}
