{"id":"https://openalex.org/W3197545550","doi":"https://doi.org/10.1145/3474085.3475648","title":"Knowledge Perceived Multi-modal Pretraining in E-commerce","display_name":"Knowledge Perceived Multi-modal Pretraining in E-commerce","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3197545550","doi":"https://doi.org/10.1145/3474085.3475648","mag":"3197545550"},"language":"en","primary_location":{"id":"doi:10.1145/3474085.3475648","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475648","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2109.00895","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yushan Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yushan Zhu","raw_affiliation_strings":["Zhejiang University, Hangzhou, Zhejiang, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Huaixiao Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huaixiao Zhao","raw_affiliation_strings":["Alibaba Group, Hangzhou, Zhejiang, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Wen Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Zhang","raw_affiliation_strings":["Zhejiang University, Hangzhou, Zhejiang, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ganqiang Ye","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ganqiang Ye","raw_affiliation_strings":["Zhejiang University, Hangzhou, Zhejiang, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hui Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hui Chen","raw_affiliation_strings":["Alibaba Group, Hangzhou, Zhejiang, China","Zhejiang University, Hangzhou, Zhejiang, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I45928872"]},{"raw_affiliation_string":"Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ningyu Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ningyu Zhang","raw_affiliation_strings":["Zhejiang University, Hangzhou, Zhejiang, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":null,"display_name":"Huajun Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huajun Chen","raw_affiliation_strings":["Alibaba Group, Hangzhou, Zhejiang, China","Zhejiang University, Hangzhou, Zhejiang, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I45928872"]},{"raw_affiliation_string":"Zhejiang University, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2613,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.82064805,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2744","last_page":"2752"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6470999717712402},{"id":"https://openalex.org/keywords/fuse","display_name":"Fuse (electrical)","score":0.47780001163482666},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.41290000081062317},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.4088999927043915},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4004000127315521},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.39809998869895935},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.3885999917984009},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.3880999982357025},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.38029998540878296}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6942999958992004},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6470999717712402},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6326000094413757},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.47780001163482666},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.41290000081062317},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.4088999927043915},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4004000127315521},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.39809998869895935},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3935000002384186},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.3885999917984009},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.3880999982357025},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.38029998540878296},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3702000081539154},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3612000048160553},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.35839998722076416},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3571999967098236},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.34200000762939453},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.3400000035762787},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.33820000290870667},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33570000529289246},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.31470000743865967},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.31049999594688416},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.28999999165534973},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.2888000011444092},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.2856999933719635},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.2782000005245209},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27070000767707825},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.25440001487731934}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3474085.3475648","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475648","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2109.00895","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2109.00895","pdf_url":"https://arxiv.org/pdf/2109.00895","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2109.00895","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2109.00895","pdf_url":"https://arxiv.org/pdf/2109.00895","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G613940918","display_name":null,"funder_award_id":"C91846204/U19B2027","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1686810756","https://openalex.org/W2183341477","https://openalex.org/W2549139847","https://openalex.org/W2893689064","https://openalex.org/W2896457183","https://openalex.org/W2919203733","https://openalex.org/W2946241506","https://openalex.org/W2951105272","https://openalex.org/W2953356739","https://openalex.org/W2964859466","https://openalex.org/W2968124245","https://openalex.org/W2969876226","https://openalex.org/W2970231061","https://openalex.org/W2970869018","https://openalex.org/W2970986510","https://openalex.org/W2981851019","https://openalex.org/W2997591391","https://openalex.org/W2998356391","https://openalex.org/W2998385486","https://openalex.org/W3023567515","https://openalex.org/W3030163527","https://openalex.org/W3035485997","https://openalex.org/W3035652667","https://openalex.org/W3080565611","https://openalex.org/W3090449556","https://openalex.org/W3097616280","https://openalex.org/W3151929433","https://openalex.org/W3156359583","https://openalex.org/W3175892112","https://openalex.org/W3177331119","https://openalex.org/W6745537798"],"related_works":[],"abstract_inverted_index":{"In":[0],"this":[1,48],"paper,":[2],"we":[3,50],"address":[4],"multi-modal":[5,16,40,61],"pretraining":[6,17,62,138],"of":[7,13,30,39,71,82,90,95,110,127,164],"product":[8,41],"data":[9,42],"in":[10,27,43,60,174],"the":[11,28,65,69,80,93,108,125,177],"field":[12],"E-commerce.":[14],"Current":[15],"methods":[18,181],"proposed":[19],"for":[20],"image":[21,72,111],"and":[22,32,67,73,113,116,130,149,161,179],"text":[23,74,114],"modalities":[24],"lack":[25],"robustness":[26],"face":[29],"modality-missing":[31,185],"modality-noise,":[33],"which":[34,56],"are":[35],"two":[36],"pervasive":[37],"problems":[38],"real":[44],"E-commerce":[45,159],"scenarios.":[46],"To":[47],"end,":[49],"propose":[51],"a":[52,117,157,162],"novel":[53],"method,":[54],"K3M,":[55],"introduces":[57],"knowledge":[58,131],"modality":[59,112],"to":[63,106,123],"correct":[64],"noise":[66],"supplement":[68],"missing":[70],"modalities.":[75,132],"The":[76,85],"modal-encoding":[77],"layer":[78,87],"extracts":[79],"features":[81],"each":[83],"modality.":[84],"modal-interaction":[86],"is":[88,104,121],"capable":[89],"effectively":[91],"modeling":[92,143,147,152],"interaction":[94],"multiple":[96],"modalities,":[97],"where":[98],"an":[99],"initial-interactive":[100],"feature":[101],"fusion":[102],"model":[103],"designed":[105,122],"maintain":[107],"independence":[109],"modality,":[115],"structure":[118],"aggregation":[119],"module":[120],"fuse":[124],"information":[126],"image,":[128],"text,":[129],"We":[133],"pretrain":[134],"K3M":[135,170],"with":[136],"three":[137],"tasks,":[139],"including":[140],"masked":[141,145],"object":[142],"(MOM),":[144],"language":[146],"(MLM),":[148],"link":[150],"prediction":[151],"(LPM).":[153],"Experimental":[154],"results":[155],"on":[156],"real-world":[158],"dataset":[160],"series":[163],"product-based":[165],"downstream":[166],"tasks":[167],"demonstrate":[168],"that":[169],"achieves":[171],"significant":[172],"improvements":[173],"performances":[175],"than":[176],"baseline":[178],"state-of-the-art":[180],"when":[182],"modality-noise":[183],"or":[184],"exists.":[186]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2021-09-13T00:00:00"}
