{"id":"https://openalex.org/W2766990604","doi":"https://doi.org/10.1145/3128572.3140452","title":"Beyond Big Data: What Can We Learn from AI Models?","display_name":"Beyond Big Data: What Can We Learn from AI Models?","publication_year":2017,"publication_date":"2017-11-03","ids":{"openalex":"https://openalex.org/W2766990604","doi":"https://doi.org/10.1145/3128572.3140452","mag":"2766990604"},"language":"en","primary_location":{"id":"doi:10.1145/3128572.3140452","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3128572.3140452","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 10th ACM Workshop on Artificial Intelligence and Security","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101545719","display_name":"Aylin Caliskan","orcid":"https://orcid.org/0000-0001-7154-8629"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Aylin Caliskan","raw_affiliation_strings":["Princeton University, Princeton, NJ, USA"],"affiliations":[{"raw_affiliation_string":"Princeton University, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5101545719"],"corresponding_institution_ids":["https://openalex.org/I20089843"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.1483994,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"1"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9801999926567078,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.9753999710083008,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8217706680297852},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7102510333061218},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.5631586313247681},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5087230801582336},{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.47562888264656067},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4653870165348053},{"id":"https://openalex.org/keywords/malware","display_name":"Malware","score":0.46048933267593384},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.45685842633247375},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.45023900270462036},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.373088538646698},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.21711599826812744},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.16302740573883057}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8217706680297852},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7102510333061218},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.5631586313247681},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5087230801582336},{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.47562888264656067},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4653870165348053},{"id":"https://openalex.org/C541664917","wikidata":"https://www.wikidata.org/wiki/Q14001","display_name":"Malware","level":2,"score":0.46048933267593384},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.45685842633247375},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.45023900270462036},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.373088538646698},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.21711599826812744},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.16302740573883057},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3128572.3140452","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3128572.3140452","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 10th ACM Workshop on Artificial Intelligence and Security","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.550000011920929}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":4,"referenced_works":["https://openalex.org/W1463623766","https://openalex.org/W2029103396","https://openalex.org/W2033696334","https://openalex.org/W2893425640"],"related_works":["https://openalex.org/W2350278424","https://openalex.org/W2366107444","https://openalex.org/W2071432835","https://openalex.org/W4239401009","https://openalex.org/W2097492617","https://openalex.org/W4234371507","https://openalex.org/W1628824497","https://openalex.org/W4299534542","https://openalex.org/W1995118279","https://openalex.org/W4240624848"],"abstract_inverted_index":{"My":[0],"research":[1],"involves":[2],"the":[3,57,61,80,226,257,262,295,299],"heavy":[4],"use":[5,38,102,300],"of":[6,83,146,184,220,301],"machine":[7,39,49,84,103,279,312],"learning":[8,40,50,85,104,280],"and":[9,22,25,30,111,116,284,298,309],"natural":[10,140],"language":[11,141,144,160],"processing":[12],"in":[13,70,120,208,243,311],"novel":[14],"ways":[15],"to":[16,54,97,108,201,217,234,250,255,261,291],"interpret":[17],"big":[18],"data,":[19],"develop":[20],"privacy":[21,110,194],"security":[23,112],"attacks,":[24],"gain":[26],"insights":[27],"about":[28,294],"humans":[29,151,172],"society":[31],"through":[32],"these":[33],"methods.":[34],"I":[35,46,73,101,114,148,169,288],"do":[36],"not":[37],"only":[41],"as":[42,105,205],"a":[43,106,129,192],"tool":[44,107],"but":[45],"also":[47],"analyze":[48],"models?":[51,281],"internal":[52,282],"representations":[53],"investigate":[55],"how":[56],"artificial":[58],"intelligence":[59],"perceives":[60],"world.":[62],"This":[63,190],"work":[64],"[3]":[65],"has":[66],"been":[67,248],"recently":[68],"featured":[69],"Science":[71],"where":[72],"showed":[74],"that":[75,150,173,198,259,270],"societal":[76],"bias":[77],"exists":[78],"at":[79],"construct":[81],"level":[82],"models,":[86],"namely":[87],"semantic":[88],"space":[89],"word":[90],"embeddings":[91],"which":[92,303],"are":[93,271],"dictionaries":[94],"for":[95,132,196,274,306],"machines":[96],"understand":[98],"language.":[99],"When":[100],"uncover":[109,292],"problems,":[113],"characterize":[115],"quantify":[117],"human":[118,286],"behavior":[119],"language,":[121,302],"including":[122],"programming":[123,143],"languages,":[124],"by":[125],"coming":[126],"up":[127],"with":[128],"linguistic":[130,137,154,285],"fingerprint":[131],"each":[133],"individual.":[134],"By":[135,277],"extracting":[136],"features":[138],"from":[139],"or":[142,180,211,238],"texts":[145],"humans,":[147],"show":[149],"have":[152,174,247,304],"unique":[153],"fingerprints":[155],"since":[156],"they":[157],"all":[158],"learn":[159],"on":[161,166],"an":[162],"individual":[163],"basis.":[164],"Based":[165],"this":[167,244],"finding,":[168],"can":[170,231],"de-anonymize":[171],"written":[175],"certain":[176],"text,":[177],"source":[178],"code,":[179],"even":[181],"executable":[182],"binaries":[183],"compiled":[185],"code":[186,222],"[2,":[187],"4,":[188],"5].":[189],"is":[191],"serious":[193],"threat":[195],"individuals":[197],"would":[199],"like":[200],"remain":[202],"anonymous,":[203],"such":[204],"activists,":[206],"programmers":[207],"oppressed":[209],"regimes,":[210],"malware":[212],"authors.":[213],"Nevertheless,":[214],"being":[215],"able":[216,290],"identify":[218,251],"authors":[219,230],"malicious":[221],"enhances":[223],"security.":[224],"On":[225],"other":[227],"hand,":[228],"identifying":[229],"be":[232],"used":[233,249],"resolve":[235],"copyright":[236],"disputes":[237],"detect":[239],"plagiarism.":[240],"The":[241],"methods":[242],"realm":[245],"[1]":[246],"so":[252],"called":[253],"doppelg\u00e4ngers":[254],"link":[256],"accounts":[258],"belong":[260],"same":[263],"identities":[264],"across":[265],"platforms,":[266],"especially":[267],"underground":[268],"forums":[269],"business":[272],"platforms":[273],"cyber":[275],"criminals.":[276],"analyzing":[278],"representation":[283],"fingerprints,":[287],"am":[289],"facts":[293],"world,":[296],"society,":[297],"implications":[305],"privacy,":[307],"security,":[308],"fairness":[310],"learning.":[313]},"counts_by_year":[{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
