{"id":"https://openalex.org/W7118390845","doi":"https://doi.org/10.1109/tmm.2026.3651039","title":"ASR-Enhanced Multimodal Representation Learning for Cross-Domain Product Retrieval","display_name":"ASR-Enhanced Multimodal Representation Learning for Cross-Domain Product Retrieval","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7118390845","doi":"https://doi.org/10.1109/tmm.2026.3651039"},"language":null,"primary_location":{"id":"doi:10.1109/tmm.2026.3651039","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2026.3651039","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122219279","display_name":"Ruixiang Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ruixiang Zhao","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0008-9984-1841","affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080008403","display_name":"Jian Jia","orcid":"https://orcid.org/0000-0002-7888-0699"},"institutions":[{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Jia","raw_affiliation_strings":["Kuaishou Technology, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kuaishou Technology, Beijing, China","institution_ids":["https://openalex.org/I4401726859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122247463","display_name":"Yan Li","orcid":null},"institutions":[{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Li","raw_affiliation_strings":["Kuaishou Technology, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kuaishou Technology, Beijing, China","institution_ids":["https://openalex.org/I4401726859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101233327","display_name":"Xuehan Bai","orcid":null},"institutions":[{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuehan Bai","raw_affiliation_strings":["Kuaishou Technology, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kuaishou Technology, Beijing, China","institution_ids":["https://openalex.org/I4401726859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115602168","display_name":"Quan Chen","orcid":"https://orcid.org/0000-0002-4865-2396"},"institutions":[{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quan Chen","raw_affiliation_strings":["Kuaishou Technology, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kuaishou Technology, Beijing, China","institution_ids":["https://openalex.org/I4401726859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020615530","display_name":"Han Li","orcid":"https://orcid.org/0009-0000-9801-9292"},"institutions":[{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Han Li","raw_affiliation_strings":["Kuaishou Technology, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kuaishou Technology, Beijing, China","institution_ids":["https://openalex.org/I4401726859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122028996","display_name":"Peng Jiang","orcid":null},"institutions":[{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Jiang","raw_affiliation_strings":["Kuaishou Technology, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kuaishou Technology, Beijing, China","institution_ids":["https://openalex.org/I4401726859"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060270456","display_name":"Xirong Li","orcid":"https://orcid.org/0000-0002-0220-8310"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xirong Li","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-0220-8310","affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5122219279"],"corresponding_institution_ids":["https://openalex.org/I78988378"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05334646,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"28","issue":null,"first_page":"2618","last_page":"2629"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.2865999937057495,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.2865999937057495,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.20669999718666077,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.19670000672340393,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.7304999828338623},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.5760999917984009},{"id":"https://openalex.org/keywords/product","display_name":"Product (mathematics)","score":0.5285000205039978},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4724999964237213},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4198000133037567},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3797999918460846}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8526999950408936},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.7304999828338623},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6197999715805054},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.5760999917984009},{"id":"https://openalex.org/C90673727","wikidata":"https://www.wikidata.org/wiki/Q901718","display_name":"Product (mathematics)","level":2,"score":0.5285000205039978},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4724999964237213},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46810001134872437},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4198000133037567},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3797999918460846},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32589998841285706},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.28790000081062317},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2808000147342682},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.2736000120639801},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2694000005722046},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C2778348673","wikidata":"https://www.wikidata.org/wiki/Q739302","display_name":"Production (economics)","level":2,"score":0.2529999911785126},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.2513999938964844}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2026.3651039","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2026.3651039","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5818443620","display_name":null,"funder_award_id":"62576348","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6518577882","display_name":null,"funder_award_id":"62172420","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"E-commerce":[0],"is":[1,28,46,61,75,128],"increasingly":[2],"<italic":[3],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[4,81,84,87,90,92,95,98,112,152],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">multimedia</i>-enriched,":[5],"with":[6,125],"products":[7],"exhibited":[8],"in":[9,39,154],"a":[10,43,132,143,156],"broad-domain":[11,41],"manner":[12],"as":[13],"images,":[14],"short":[15,57],"videos,":[16],"or":[17,58],"live":[18],"stream":[19],"promotions.":[20],"A":[21],"unified":[22,157],"and":[23,35],"vectorized":[24],"cross-domain":[25,164],"production":[26],"representation":[27,45,73,160],"essential.":[29],"Due":[30],"to":[31,65,102,135],"large":[32],"intra-product":[33],"variance":[34],"high":[36],"inter-product":[37],"similarity":[38],"the":[40,56,67,107,148],"scenario,":[42],"visual-only":[44],"inadequate.":[47],"While":[48],"Automatic":[49],"Speech":[50],"Recognition":[51],"(ASR)":[52],"text":[53,70,119],"derived":[54],"from":[55,106],"live-stream":[59],"videos":[60],"readily":[62],"accessible,":[63],"how":[64],"de-noise":[66],"excessively":[68],"noisy":[69],"for":[71],"multimodal":[72,138,158],"learning":[74],"mostly":[76],"untouched.":[77],"We":[78],"propose":[79],"<underline":[80,83,86],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">A</u>SR-enhanced":[82],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">M</u>ultimodal":[85],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">P</u>roduct":[88],"R<underline":[89],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">e</u>p<underline":[91],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">r</u>esentation":[93],"L<underline":[94],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">e</u>arning":[96],"(<monospace":[97],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">AMPere</monospace>).":[99],"In":[100],"order":[101],"extract":[103],"product-specific":[104],"information":[105],"raw":[108],"ASR":[109,118],"text,":[110,123],"<monospace":[111,151],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">AMPere</monospace>":[113,153],"uses":[114],"an":[115],"easy-to-implement":[116],"LLM-based":[117],"summarizer.":[120],"The":[121],"LLM-summarized":[122],"together":[124],"visual":[126],"data,":[127],"then":[129],"fed":[130],"into":[131],"multi-branch":[133],"network":[134],"generate":[136],"compact":[137],"embeddings.":[139],"Extensive":[140],"experiments":[141],"on":[142],"large-scale":[144],"tri-domain":[145],"dataset":[146],"verify":[147],"effectiveness":[149],"of":[150],"obtaining":[155],"product":[159,165],"that":[161],"clearly":[162],"improves":[163],"retrieval.":[166]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-01-08T00:00:00"}
