{"id":"https://openalex.org/W3022249559","doi":"https://doi.org/10.1145/3341105.3374015","title":"Investigation of biases in identity linkage DataSets","display_name":"Investigation of biases in identity linkage DataSets","publication_year":2020,"publication_date":"2020-03-29","ids":{"openalex":"https://openalex.org/W3022249559","doi":"https://doi.org/10.1145/3341105.3374015","mag":"3022249559"},"language":"en","primary_location":{"id":"doi:10.1145/3341105.3374015","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3341105.3374015","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 35th Annual ACM Symposium on Applied Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051458180","display_name":"Rishabh Kaushal","orcid":"https://orcid.org/0000-0002-9200-7802"},"institutions":[{"id":"https://openalex.org/I119939252","display_name":"Indraprastha Institute of Information Technology Delhi","ror":"https://ror.org/03vfp4g33","country_code":"IN","type":"education","lineage":["https://openalex.org/I119939252"]},{"id":"https://openalex.org/I4210143260","display_name":"Indira Gandhi Delhi Technical University for Women","ror":"https://ror.org/057c5p638","country_code":"IN","type":"education","lineage":["https://openalex.org/I4210143260"]},{"id":"https://openalex.org/I68891433","display_name":"Indian Institute of Technology Delhi","ror":"https://ror.org/049tgcd06","country_code":"IN","type":"education","lineage":["https://openalex.org/I68891433"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Rishabh Kaushal","raw_affiliation_strings":["IGDTUW &amp; IIIT, Delhi"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IGDTUW &amp; IIIT, Delhi","institution_ids":["https://openalex.org/I4210143260","https://openalex.org/I68891433","https://openalex.org/I119939252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044311646","display_name":"Shubham Gupta","orcid":"https://orcid.org/0000-0002-4710-3686"},"institutions":[{"id":"https://openalex.org/I119939252","display_name":"Indraprastha Institute of Information Technology Delhi","ror":"https://ror.org/03vfp4g33","country_code":"IN","type":"education","lineage":["https://openalex.org/I119939252"]},{"id":"https://openalex.org/I68891433","display_name":"Indian Institute of Technology Delhi","ror":"https://ror.org/049tgcd06","country_code":"IN","type":"education","lineage":["https://openalex.org/I68891433"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Shubham Gupta","raw_affiliation_strings":["IIIT, Delhi"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IIIT, Delhi","institution_ids":["https://openalex.org/I119939252","https://openalex.org/I68891433"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077509916","display_name":"Ponnurangam Kumaraguru","orcid":"https://orcid.org/0000-0001-5082-2078"},"institutions":[{"id":"https://openalex.org/I119939252","display_name":"Indraprastha Institute of Information Technology Delhi","ror":"https://ror.org/03vfp4g33","country_code":"IN","type":"education","lineage":["https://openalex.org/I119939252"]},{"id":"https://openalex.org/I68891433","display_name":"Indian Institute of Technology Delhi","ror":"https://ror.org/049tgcd06","country_code":"IN","type":"education","lineage":["https://openalex.org/I68891433"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Ponnurangam Kumaraguru","raw_affiliation_strings":["IIIT, Delhi"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IIIT, Delhi","institution_ids":["https://openalex.org/I119939252","https://openalex.org/I68891433"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.1354,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.53739592,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1861","last_page":"1868"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9728999733924866,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7428290843963623},{"id":"https://openalex.org/keywords/linkage","display_name":"Linkage (software)","score":0.7250754237174988},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.7043694853782654},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.675521969795227},{"id":"https://openalex.org/keywords/social-network","display_name":"Social network (sociolinguistics)","score":0.4947807192802429},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.49374040961265564},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.48880264163017273},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42950063943862915},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.39049696922302246},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3812229633331299},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3369288444519043},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.3246743679046631},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.22453683614730835}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7428290843963623},{"id":"https://openalex.org/C31266012","wikidata":"https://www.wikidata.org/wiki/Q6554340","display_name":"Linkage (software)","level":3,"score":0.7250754237174988},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.7043694853782654},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.675521969795227},{"id":"https://openalex.org/C4727928","wikidata":"https://www.wikidata.org/wiki/Q17164759","display_name":"Social network (sociolinguistics)","level":3,"score":0.4947807192802429},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.49374040961265564},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.48880264163017273},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42950063943862915},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.39049696922302246},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3812229633331299},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3369288444519043},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.3246743679046631},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.22453683614730835},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3341105.3374015","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3341105.3374015","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 35th Annual ACM Symposium on Applied Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5400000214576721,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"},{"score":0.4399999976158142,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1588439207","https://openalex.org/W1767117126","https://openalex.org/W1980680715","https://openalex.org/W2047532797","https://openalex.org/W2059141064","https://openalex.org/W2075633077","https://openalex.org/W2077738931","https://openalex.org/W2113105081","https://openalex.org/W2113839561","https://openalex.org/W2166454173","https://openalex.org/W2492995478","https://openalex.org/W2507358938","https://openalex.org/W2563852449","https://openalex.org/W2598689838","https://openalex.org/W2613631298","https://openalex.org/W2613654763","https://openalex.org/W2704480242","https://openalex.org/W2750144484","https://openalex.org/W2751894146","https://openalex.org/W2767922255","https://openalex.org/W2777398797","https://openalex.org/W2910883930","https://openalex.org/W3013374781","https://openalex.org/W3102092462","https://openalex.org/W3122605581","https://openalex.org/W3125143440","https://openalex.org/W3207167514"],"related_works":["https://openalex.org/W4282976635","https://openalex.org/W4295532600","https://openalex.org/W2063823869","https://openalex.org/W2067569035","https://openalex.org/W2090985514","https://openalex.org/W2047973478","https://openalex.org/W2294667518","https://openalex.org/W2032182853","https://openalex.org/W2387801216","https://openalex.org/W2955875337"],"abstract_inverted_index":{"In":[0],"social":[1,19,40,64,171],"networks,":[2],"the":[3,23,44,67,100,114,119,186,209],"problem":[4],"of":[5,14,36,69,121,191,211,218],"identity":[6,101,146],"linkage":[7,102],"is":[8],"to":[9,22,43,207],"find":[10,159,215],"whether":[11],"a":[12,50,74,85,92,205],"pair":[13],"user":[15,37,58,145,152],"identities":[16,38],"on":[17,140,169],"two":[18,151],"networks":[20,41,65,172],"belong":[21],"same":[24,45],"individual":[25],"or":[26],"not.":[27],"Prior":[28],"works":[29],"typically":[30],"first":[31],"collect":[32],"ground":[33],"truth":[34],"datasets":[35],"across":[39],"belonging":[42],"individuals":[46],"and":[47,72,109,130,156,166,189,203,214],"then":[48],"build":[49],"machine":[51],"learning":[52,192],"model":[53],"driven":[54],"by":[55,134,149,194,197],"features":[56],"from":[57],"identities.":[59],"User":[60],"behaviors":[61],"in":[62,80,99,113,118,124,128],"different":[63,170],"drive":[66],"construction":[68],"these":[70,89,138,212],"datasets,":[71],"as":[73],"consequence,":[75],"behavioral":[76,111,182],"biases":[77,112,139,183,213],"get":[78,221],"manifested":[79],"them.":[81],"Our":[82],"work":[83,93],"performs":[84],"detailed":[86],"investigation":[87],"into":[88],"dataset":[90,115],"biases,":[91],"which":[94],"has":[95],"mostly":[96],"remained":[97],"under-explored":[98],"research.":[103],"More":[104],"specifically,":[105],"we":[106,201],"characterize,":[107],"detect,":[108],"quantify":[110,208],"that":[116,160,216],"manifest":[117],"form":[120],"lexical":[122,175],"differences":[123],"user-generated":[125],"content,":[126],"particularly":[127],"usernames":[129,165],"display":[131,167],"names":[132,168],"configured":[133],"users.":[135],"We":[136,158],"study":[137],"more":[141],"than":[142,177],"1":[143],"million":[144],"pairs":[147],"obtained":[148],"leveraging":[150],"behaviors,":[153],"namely":[154],"cross-posting":[155],"self-disclosure.":[157],"users":[161,178],"who":[162,179],"self-disclose":[163],"their":[164],"show":[173],"higher":[174],"similarity":[176],"cross-post.":[180],"These":[181],"lower":[184],"down":[185],"performance":[187],"(precision":[188],"recall)":[190],"models":[193],"5-20%.":[195],"Inspired":[196],"discrimination":[198],"measurement":[199],"metrics,":[200],"propose":[202],"implement":[204],"framework":[206],"extent":[210],"15--20%":[217],"test":[219],"data":[220],"affected.":[222]},"counts_by_year":[{"year":2020,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
