{"id":"https://openalex.org/W4366551603","doi":"https://doi.org/10.1145/3539618.3591831","title":"Integrity and Junkiness Failure Handling for Embedding-based Retrieval: A Case Study in Social Network Search","display_name":"Integrity and Junkiness Failure Handling for Embedding-based Retrieval: A Case Study in Social Network Search","publication_year":2023,"publication_date":"2023-07-18","ids":{"openalex":"https://openalex.org/W4366551603","doi":"https://doi.org/10.1145/3539618.3591831"},"language":"en","primary_location":{"id":"doi:10.1145/3539618.3591831","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3539618.3591831","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3539618.3591831","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3539618.3591831","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023352184","display_name":"Wenping Wang","orcid":"https://orcid.org/0000-0001-9665-3783"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Wenping Wang","raw_affiliation_strings":["Meta Platforms, Inc, Menlo Park, CA, USA"],"raw_orcid":"https://orcid.org/0000-0001-9665-3783","affiliations":[{"raw_affiliation_string":"Meta Platforms, Inc, Menlo Park, CA, USA","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009901847","display_name":"Yunxi Guo","orcid":"https://orcid.org/0000-0003-0316-5735"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yunxi Guo","raw_affiliation_strings":["Meta Platforms, Inc, Menlo Park, CA, USA"],"raw_orcid":"https://orcid.org/0000-0003-0316-5735","affiliations":[{"raw_affiliation_string":"Meta Platforms, Inc, Menlo Park, CA, USA","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044348647","display_name":"Chiyao Shen","orcid":"https://orcid.org/0009-0009-5104-3020"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chiyao Shen","raw_affiliation_strings":["Meta Platforms, Inc, Menlo Park, CA, USA"],"raw_orcid":"https://orcid.org/0009-0009-5104-3020","affiliations":[{"raw_affiliation_string":"Meta Platforms, Inc, Menlo Park, CA, USA","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078621623","display_name":"Shuai Ding","orcid":"https://orcid.org/0009-0001-2744-3018"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shuai Ding","raw_affiliation_strings":["Meta Platforms, Inc, Menlo Park, CA, USA"],"raw_orcid":"https://orcid.org/0009-0001-2744-3018","affiliations":[{"raw_affiliation_string":"Meta Platforms, Inc, Menlo Park, CA, USA","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057491916","display_name":"Guangdeng Liao","orcid":"https://orcid.org/0009-0008-9724-6993"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guangdeng Liao","raw_affiliation_strings":["Meta Platforms, Inc, Menlo Park, CA, USA"],"raw_orcid":"https://orcid.org/0009-0008-9724-6993","affiliations":[{"raw_affiliation_string":"Meta Platforms, Inc, Menlo Park, CA, USA","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033189459","display_name":"Hao Fu","orcid":"https://orcid.org/0000-0002-8003-0212"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hao Fu","raw_affiliation_strings":["Meta Platforms, Inc, Menlo Park, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-8003-0212","affiliations":[{"raw_affiliation_string":"Meta Platforms, Inc, Menlo Park, CA, USA","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021860254","display_name":"Pramodh Karanth Prabhakar","orcid":"https://orcid.org/0009-0001-3881-788X"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pramodh Karanth Prabhakar","raw_affiliation_strings":["Meta Platforms, Inc, Menlo Park, CA, USA"],"raw_orcid":"https://orcid.org/0009-0001-3881-788X","affiliations":[{"raw_affiliation_string":"Meta Platforms, Inc, Menlo Park, CA, USA","institution_ids":["https://openalex.org/I4210114444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5023352184"],"corresponding_institution_ids":["https://openalex.org/I4210114444"],"apc_list":null,"apc_paid":null,"fwci":1.1929,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.82507584,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3250","last_page":"3254"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8142467737197876},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.68194979429245},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5840293169021606},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.5797947645187378},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5520131587982178},{"id":"https://openalex.org/keywords/offensive","display_name":"Offensive","score":0.544072151184082},{"id":"https://openalex.org/keywords/learning-to-rank","display_name":"Learning to rank","score":0.5411564111709595},{"id":"https://openalex.org/keywords/rss","display_name":"RSS","score":0.539023756980896},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5276008248329163},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5174884796142578},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.4901195466518402},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.48355504870414734},{"id":"https://openalex.org/keywords/semantic-search","display_name":"Semantic search","score":0.47283798456192017},{"id":"https://openalex.org/keywords/social-network","display_name":"Social network (sociolinguistics)","score":0.4268971085548401},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4140487611293793},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.38070183992385864},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35543093085289},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.33984625339508057},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.26786351203918457},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2231319546699524},{"id":"https://openalex.org/keywords/operations-research","display_name":"Operations research","score":0.09530365467071533}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8142467737197876},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.68194979429245},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5840293169021606},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.5797947645187378},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5520131587982178},{"id":"https://openalex.org/C176856949","wikidata":"https://www.wikidata.org/wiki/Q2001676","display_name":"Offensive","level":2,"score":0.544072151184082},{"id":"https://openalex.org/C86037889","wikidata":"https://www.wikidata.org/wiki/Q4330127","display_name":"Learning to rank","level":3,"score":0.5411564111709595},{"id":"https://openalex.org/C2385561","wikidata":"https://www.wikidata.org/wiki/Q45432","display_name":"RSS","level":2,"score":0.539023756980896},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5276008248329163},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5174884796142578},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.4901195466518402},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.48355504870414734},{"id":"https://openalex.org/C166423231","wikidata":"https://www.wikidata.org/wiki/Q1891170","display_name":"Semantic search","level":3,"score":0.47283798456192017},{"id":"https://openalex.org/C4727928","wikidata":"https://www.wikidata.org/wiki/Q17164759","display_name":"Social network (sociolinguistics)","level":3,"score":0.4268971085548401},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4140487611293793},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.38070183992385864},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35543093085289},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.33984625339508057},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.26786351203918457},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2231319546699524},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.09530365467071533},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3539618.3591831","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3539618.3591831","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3539618.3591831","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2304.09287","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2304.09287","pdf_url":"https://arxiv.org/pdf/2304.09287","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3539618.3591831","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3539618.3591831","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3539618.3591831","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.5600000023841858,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4366551603.pdf","grobid_xml":"https://content.openalex.org/works/W4366551603.grobid-xml"},"referenced_works_count":12,"referenced_works":["https://openalex.org/W2964369530","https://openalex.org/W3034969702","https://openalex.org/W3035447285","https://openalex.org/W3036320503","https://openalex.org/W3098468692","https://openalex.org/W3153182568","https://openalex.org/W3164446335","https://openalex.org/W3166125679","https://openalex.org/W3167329294","https://openalex.org/W3172750682","https://openalex.org/W4287117245","https://openalex.org/W4288359825"],"related_works":["https://openalex.org/W2359166167","https://openalex.org/W3590553","https://openalex.org/W4297963434","https://openalex.org/W3110844189","https://openalex.org/W2098875573","https://openalex.org/W2153030345","https://openalex.org/W1976839151","https://openalex.org/W2336826532","https://openalex.org/W3040185272","https://openalex.org/W2373953901"],"abstract_inverted_index":{"Embedding":[0],"based":[1],"retrieval":[2,52],"has":[3,22],"seen":[4],"its":[5,24],"usage":[6],"in":[7,26,54,145],"a":[8],"variety":[9],"of":[10,40,50,68],"search":[11,17,61],"applications":[12],"like":[13,28,100],"e-commerce,":[14],"social":[15,59],"networking":[16],"etc.":[18,127],"While":[19],"the":[20,38,95,117,133,149,152],"approach":[21],"demonstrated":[23],"efficacy":[25],"tasks":[27],"semantic":[29],"matching":[30,103],"and":[31,63,74,85,122,139],"contextual":[32],"search,":[33],"it":[34],"is":[35],"plagued":[36],"by":[37,71],"problem":[39],"uncontrollable":[41],"relevance.":[42],"In":[43],"this":[44,164],"paper,":[45],"we":[46,131],"conduct":[47],"an":[48],"analysis":[49],"embedding-based":[51],"launched":[53],"early":[55],"2021":[56],"on":[57],"our":[58,157],"network":[60],"engine,":[62],"define":[64],"two":[65],"main":[66],"categories":[67],"failures":[69],"introduced":[70],"it,":[72],"integrity":[73],"junkiness.":[75],"The":[76],"former":[77],"refers":[78],"to":[79,115,163,175],"issues":[80],"such":[81],"as":[82],"hate":[83],"speech":[84],"offensive":[86],"content":[87],"that":[88,156],"can":[89],"severely":[90],"harm":[91],"user":[92,124],"experience,":[93],"while":[94],"latter":[96],"includes":[97],"irrelevant":[98],"results":[99],"fuzzy":[101],"text":[102],"or":[104],"language":[105],"mismatches.":[106],"Efficient":[107],"methods":[108,134,158],"during":[109],"model":[110],"inference":[111],"are":[112,159],"further":[113],"proposed":[114],"resolve":[116],"issue,":[118],"including":[119],"indexing":[120],"treatments":[121],"targeted":[123],"cohort":[125],"treatments,":[126],"Though":[128],"being":[129],"simple,":[130],"show":[132],"have":[135],"good":[136],"offline":[137],"NDCG":[138],"online":[140],"A/B":[141],"tests":[142],"metrics":[143],"gain":[144],"practice.":[146],"We":[147,169],"analyze":[148],"reasons":[150],"for":[151],"improvements,":[153],"pointing":[154],"out":[155],"only":[160],"preliminary":[161],"attempts":[162],"important":[165],"but":[166],"challenging":[167],"problem.":[168],"put":[170],"forward":[171],"potential":[172],"future":[173],"directions":[174],"explore.":[176]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2023-04-22T00:00:00"}
