{"id":"https://openalex.org/W4387846954","doi":"https://doi.org/10.1145/3583780.3614661","title":"Build Faster with Less: A Journey to Accelerate Sparse Model Building for Semantic Matching in Product Search","display_name":"Build Faster with Less: A Journey to Accelerate Sparse Model Building for Semantic Matching in Product Search","publication_year":2023,"publication_date":"2023-10-21","ids":{"openalex":"https://openalex.org/W4387846954","doi":"https://doi.org/10.1145/3583780.3614661"},"language":"en","primary_location":{"id":"doi:10.1145/3583780.3614661","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3583780.3614661","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3583780.3614661","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3583780.3614661","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101969233","display_name":"Jiong Zhang","orcid":"https://orcid.org/0000-0003-3192-3281"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jiong Zhang","raw_affiliation_strings":["Amazon, Palo Alto, CA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018908998","display_name":"Yau-Shian Wang","orcid":"https://orcid.org/0009-0003-3240-7710"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yau-Shian Wang","raw_affiliation_strings":["Amazon, Palo Alto, CA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006559148","display_name":"Wei-Cheng Chang","orcid":"https://orcid.org/0000-0002-5646-9356"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wei-Cheng Chang","raw_affiliation_strings":["Amazon, Palo Alto, CA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102865418","display_name":"Wei Li","orcid":"https://orcid.org/0009-0005-3967-3215"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wei Li","raw_affiliation_strings":["Amazon, Palo Alto, CA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048749901","display_name":"Jyun\u2010Yu Jiang","orcid":"https://orcid.org/0000-0002-1753-8099"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jyun-Yu Jiang","raw_affiliation_strings":["Amazon, Palo Alto, CA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010841999","display_name":"Cho\u2010Jui Hsieh","orcid":"https://orcid.org/0000-0002-3520-9627"},"institutions":[{"id":"https://openalex.org/I2799798094","display_name":"UCLA Health","ror":"https://ror.org/01d88se56","country_code":"US","type":"funder","lineage":["https://openalex.org/I2799798094"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cho-Jui Hsieh","raw_affiliation_strings":["UCLA, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"UCLA, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I2799798094"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023183059","display_name":"Hsiang\u2010Fu Yu","orcid":"https://orcid.org/0000-0001-5235-2962"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hsiang-Fu Yu","raw_affiliation_strings":["Amazon, Palo Alto, CA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101969233"],"corresponding_institution_ids":["https://openalex.org/I1311688040"],"apc_list":null,"apc_paid":null,"fwci":0.3491,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.66151519,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"4960","last_page":"4966"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.829094409942627},{"id":"https://openalex.org/keywords/semantic-matching","display_name":"Semantic matching","score":0.6538408398628235},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5803520679473877},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5258539319038391},{"id":"https://openalex.org/keywords/semantic-data-model","display_name":"Semantic data model","score":0.46313217282295227},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4614966809749603},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4477618932723999},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.426156222820282},{"id":"https://openalex.org/keywords/pattern-matching","display_name":"Pattern matching","score":0.4118930995464325},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35608819127082825},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3409397602081299}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.829094409942627},{"id":"https://openalex.org/C2778493491","wikidata":"https://www.wikidata.org/wiki/Q7449072","display_name":"Semantic matching","level":3,"score":0.6538408398628235},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5803520679473877},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5258539319038391},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.46313217282295227},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4614966809749603},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4477618932723999},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.426156222820282},{"id":"https://openalex.org/C68859911","wikidata":"https://www.wikidata.org/wiki/Q1503724","display_name":"Pattern matching","level":2,"score":0.4118930995464325},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35608819127082825},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3409397602081299},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3583780.3614661","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3583780.3614661","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3583780.3614661","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3583780.3614661","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3583780.3614661","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3583780.3614661","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387846954.pdf","grobid_xml":"https://content.openalex.org/works/W4387846954.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W1585610988","https://openalex.org/W2134089414","https://openalex.org/W2149381887","https://openalex.org/W2512971201","https://openalex.org/W2788125153","https://openalex.org/W2951534261","https://openalex.org/W2963469388","https://openalex.org/W2964369530","https://openalex.org/W2998702515","https://openalex.org/W2999508688","https://openalex.org/W2999905431","https://openalex.org/W3001645704","https://openalex.org/W3003257820","https://openalex.org/W3023045848","https://openalex.org/W3037422790","https://openalex.org/W3045268955","https://openalex.org/W3081146346","https://openalex.org/W3102124616","https://openalex.org/W3157393048","https://openalex.org/W3172352177","https://openalex.org/W3177232285","https://openalex.org/W3201691278","https://openalex.org/W3211566171","https://openalex.org/W4232006005","https://openalex.org/W4251326898","https://openalex.org/W4252076394","https://openalex.org/W4284670079","https://openalex.org/W4290877723","https://openalex.org/W6602019454"],"related_works":["https://openalex.org/W3008625068","https://openalex.org/W3128807919","https://openalex.org/W3176411177","https://openalex.org/W3035501883","https://openalex.org/W2373133917","https://openalex.org/W2903843515","https://openalex.org/W125027177","https://openalex.org/W4380343153","https://openalex.org/W67953000","https://openalex.org/W2387181381"],"abstract_inverted_index":{"The":[0],"semantic":[1,40,96,123],"matching":[2,41,124],"problem":[3,80],"in":[4,35,115],"product":[5],"search":[6],"seeks":[7],"to":[8,46,72,82,139],"retrieve":[9],"all":[10],"semantically":[11],"relevant":[12],"products":[13],"given":[14],"a":[15,116,121],"user":[16],"query.":[17],"Recent":[18],"studies":[19],"have":[20],"shown":[21],"that":[22],"extreme":[23],"multi-label":[24],"classification~(XMC)":[25],"model":[26,58,75,97,112],"enjoys":[27],"both":[28],"low":[29],"inference":[30],"latency":[31],"and":[32,51,93,109],"high":[33],"recall":[34],"real-world":[36,122],"scenarios.":[37],"These":[38],"XMC":[39,118],"models":[42],"adopt":[43],"TF-IDF":[44],"vectorizers":[45],"extract":[47],"query":[48],"text":[49],"features":[50],"use":[52],"mainly":[53],"sparse":[54,68],"matrices":[55],"for":[56,65],"the":[57,79,95,111],"weights.":[59],"However,":[60],"limited":[61],"availability":[62],"of":[63,84,86],"libraries":[64],"efficient":[66],"parallel":[67],"modules":[69],"may":[70],"lead":[71],"tediously":[73],"long":[74],"building":[76,113],"time":[77],"when":[78],"scales":[81],"hundreds":[83],"millions":[85],"labels.":[87],"This":[88],"incurs":[89],"significant":[90],"hardware":[91,144],"cost":[92,145],"renders":[94],"stale":[98],"even":[99],"before":[100],"it":[101],"is":[102],"deployed.":[103],"In":[104],"this":[105],"paper,":[106],"we":[107],"investigate":[108],"accelerate":[110],"procedures":[114],"tree-based":[117],"model.":[119],"On":[120],"task":[125],"with":[126],"100M":[127],"labels,":[128],"our":[129],"enhancements":[130],"achieve":[131],"over":[132],"10":[133],"times":[134],"acceleration":[135],"(from":[136],"3.1":[137],"days":[138],"6.7":[140],"hours)":[141],"while":[142],"reducing":[143],"by":[146],"25%.":[147]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
