{"id":"https://openalex.org/W4411278031","doi":"https://doi.org/10.1145/3699682.3728329","title":"Mitigating Risks in Marketplace Semantic Search: A Dataset for Harmful and Sensitive Query Alignment","display_name":"Mitigating Risks in Marketplace Semantic Search: A Dataset for Harmful and Sensitive Query Alignment","publication_year":2025,"publication_date":"2025-06-13","ids":{"openalex":"https://openalex.org/W4411278031","doi":"https://doi.org/10.1145/3699682.3728329"},"language":"en","primary_location":{"id":"doi:10.1145/3699682.3728329","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3699682.3728329","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3699682.3728329","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM Conference on User Modeling, Adaptation and Personalization","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3699682.3728329","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5118140964","display_name":"Filip Spacek","orcid":"https://orcid.org/0009-0002-4407-2260"},"institutions":[{"id":"https://openalex.org/I44504214","display_name":"Czech Technical University in Prague","ror":"https://ror.org/03kqpb082","country_code":"CZ","type":"education","lineage":["https://openalex.org/I44504214"]}],"countries":["CZ"],"is_corresponding":true,"raw_author_name":"Filip Spacek","raw_affiliation_strings":["Czech Technical University, Prague, Czech Republic"],"raw_orcid":"https://orcid.org/0009-0002-4407-2260","affiliations":[{"raw_affiliation_string":"Czech Technical University, Prague, Czech Republic","institution_ids":["https://openalex.org/I44504214"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022460264","display_name":"Vojt\u011bch Van\u010dura","orcid":"https://orcid.org/0000-0003-2638-9969"},"institutions":[{"id":"https://openalex.org/I44504214","display_name":"Czech Technical University in Prague","ror":"https://ror.org/03kqpb082","country_code":"CZ","type":"education","lineage":["https://openalex.org/I44504214"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Vojtech Vancura","raw_affiliation_strings":["Department of Applied Mathematics, Czech Technical University, Prague, Czech Republic"],"raw_orcid":"https://orcid.org/0000-0003-2638-9969","affiliations":[{"raw_affiliation_string":"Department of Applied Mathematics, Czech Technical University, Prague, Czech Republic","institution_ids":["https://openalex.org/I44504214"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035026045","display_name":"Pavel Kord\u00edk","orcid":"https://orcid.org/0000-0003-1433-0089"},"institutions":[{"id":"https://openalex.org/I44504214","display_name":"Czech Technical University in Prague","ror":"https://ror.org/03kqpb082","country_code":"CZ","type":"education","lineage":["https://openalex.org/I44504214"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Pavel Kordik","raw_affiliation_strings":["Czech Technical University, Prague, Prague, Czech Republic"],"raw_orcid":"https://orcid.org/0000-0003-1433-0089","affiliations":[{"raw_affiliation_string":"Czech Technical University, Prague, Prague, Czech Republic","institution_ids":["https://openalex.org/I44504214"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5118140964"],"corresponding_institution_ids":["https://openalex.org/I44504214"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.15101162,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"329","last_page":"334"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7120683193206787},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5594258904457092},{"id":"https://openalex.org/keywords/web-search-query","display_name":"Web search query","score":0.49489444494247437},{"id":"https://openalex.org/keywords/query-expansion","display_name":"Query expansion","score":0.4627547264099121},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.2912804186344147}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7120683193206787},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5594258904457092},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.49489444494247437},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.4627547264099121},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.2912804186344147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3699682.3728329","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3699682.3728329","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3699682.3728329","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM Conference on User Modeling, Adaptation and Personalization","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3699682.3728329","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3699682.3728329","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3699682.3728329","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM Conference on User Modeling, Adaptation and Personalization","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1351995356","display_name":"Fostering Sustainable, Balanced, Equitable, Place-based and Inclusive Development of Rural-Urban Communities' Using Specific Spatial Enhanced Attractivenes Mapping ToolBox","funder_award_id":"101136910","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G519331849","display_name":null,"funder_award_id":"101136910","funder_id":"https://openalex.org/F4320338446","funder_display_name":"HORIZON EUROPE Food, Bioeconomy, Natural Resources, Agriculture and Environment"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320338446","display_name":"HORIZON EUROPE Food, Bioeconomy, Natural Resources, Agriculture and Environment","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4411278031.pdf","grobid_xml":"https://content.openalex.org/works/W4411278031.grobid-xml"},"referenced_works_count":10,"referenced_works":["https://openalex.org/W1527758775","https://openalex.org/W2127744755","https://openalex.org/W2158893217","https://openalex.org/W2970641574","https://openalex.org/W4376056120","https://openalex.org/W4380319827","https://openalex.org/W4385574179","https://openalex.org/W4386580390","https://openalex.org/W4391473457","https://openalex.org/W6949877883"],"related_works":["https://openalex.org/W2096359267","https://openalex.org/W1521725692","https://openalex.org/W3008917487","https://openalex.org/W3001245047","https://openalex.org/W1873153460","https://openalex.org/W2901901036","https://openalex.org/W3197639690","https://openalex.org/W4256058599","https://openalex.org/W2044231962","https://openalex.org/W2961567132"],"abstract_inverted_index":{"Semantic":[0],"search":[1,46,155,232],"engines":[2],"have":[3],"transformed":[4],"user":[5,40,60],"interaction":[6],"with":[7,25,75,112],"online":[8],"marketplaces,":[9],"creating":[10],"a":[11,54,71,76,84,103,109,124,159],"need":[12],"for":[13,58,131,161],"effective":[14],"methods":[15,140],"to":[16,38,87,190,216,223],"moderate":[17],"harmful":[18],"and":[19,32,65,91,121,147,178,184,194,204,222],"sensitive":[20],"content.Existing":[21],"approaches":[22],"often":[23],"struggle":[24],"ambiguous":[26],"query":[27,133],"intent,":[28],"content":[29,164,205,220],"classification":[30,99,116],"challenges,":[31,50],"noisy":[33],"data,":[34],"making":[35],"it":[36],"difficult":[37],"ensure":[39,88],"safety":[41],"while":[42],"maintaining":[43],"relevance":[44],"in":[45,141,152,167,219,230],"results.To":[47],"address":[48],"these":[49,139],"we":[51,96],"introduce":[52],"SHIELD,":[53],"synthetic":[55],"dataset":[56],"designed":[57],"classifying":[59],"queries":[61],"into":[62,172],"harmful,":[63],"sensitive,":[64],"normal":[66],"categories.SHIELD":[67],"is":[68],"generated":[69],"using":[70,83],"large":[72],"language":[73,203],"model":[74,86,127],"structured":[77],"taxonomy,":[78],"followed":[79],"by":[80,117],"automated":[81],"filtering":[82],"reward":[85],"data":[89],"quality":[90],"relevance.To":[92],"demonstrate":[93],"SHIELD's":[94],"utility,":[95],"evaluate":[97],"three":[98],"approaches:":[100],"(1)":[101],"BM25,":[102],"computationally":[104],"efficient":[105],"retrieval-based":[106],"method;":[107],"(2)":[108],"sentence":[110],"transformer":[111,126],"FAISS,":[113],"which":[114],"improves":[115],"leveraging":[118],"semantic":[119,154,168,231],"embeddings;":[120],"(3)":[122],"MoralBERT,":[123],"fine-tuned":[125],"trained":[128],"on":[129],"SHIELD":[130,180],"direct":[132],"classification.We":[134],"discuss":[135],"the":[136,173,225],"trade-offs":[137,174],"among":[138],"terms":[142],"of":[143,202,227],"accuracy,":[144,177],"resource":[145],"requirements,":[146],"explainability,":[148],"highlighting":[149],"their":[150],"applicability":[151],"real-world":[153,195],"systems.This":[156],"work":[157],"provides":[158],"foundation":[160],"developing":[162],"AI-driven":[163],"moderation":[165,221],"systems":[166],"search,":[169],"offering":[170],"insights":[171],"between":[175],"efficiency,":[176],"explainability.The":[179],"dataset,":[181],"pre-trained":[182],"model,":[183],"generation":[185],"details":[186],"are":[187,213],"publicly":[188],"available":[189],"support":[191],"future":[192],"research":[193],"deployment:":[196],"https://github.com/flpspacek/SHIELD.Warning:":[197],"This":[198],"paper":[199],"contains":[200],"examples":[201,212],"that":[206],"some":[207],"readers":[208],"may":[209],"find":[210],"offensive.These":[211],"included":[214],"solely":[215],"illustrate":[217],"challenges":[218],"highlight":[224],"importance":[226],"ethical":[228],"considerations":[229],"systems.":[233]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
