{"id":"https://openalex.org/W6949510369","doi":"https://doi.org/10.5281/zenodo.14943110","title":"Exploring Measures of Distinctiveness: An Evaluation Using Synthetic Texts","display_name":"Exploring Measures of Distinctiveness: An Evaluation Using Synthetic Texts","publication_year":2025,"publication_date":"2025-02-26","ids":{"openalex":"https://openalex.org/W6949510369","doi":"https://doi.org/10.5281/zenodo.14943110"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.14943110","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14943110","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.14943110","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Dudar, Julia","orcid":"https://orcid.org/0000-0001-5545-9562"},"institutions":[{"id":"https://openalex.org/I89864525","display_name":"Universit\u00e4t Trier","ror":"https://ror.org/02778hg05","country_code":"DE","type":"education","lineage":["https://openalex.org/I89864525"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Dudar, Julia","raw_affiliation_strings":["Universit\u00e4t Trier, Deutschland"],"raw_orcid":"https://orcid.org/0000-0001-5545-9562","affiliations":[{"raw_affiliation_string":"Universit\u00e4t Trier, Deutschland","institution_ids":["https://openalex.org/I89864525"]}]},{"author_position":"last","author":{"id":null,"display_name":"Sch\u00f6ch, Christof","orcid":"https://orcid.org/0000-0002-4557-2753"},"institutions":[{"id":"https://openalex.org/I89864525","display_name":"Universit\u00e4t Trier","ror":"https://ror.org/02778hg05","country_code":"DE","type":"education","lineage":["https://openalex.org/I89864525"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sch\u00f6ch, Christof","raw_affiliation_strings":["Universit\u00e4t Trier, Deutschland"],"raw_orcid":"https://orcid.org/0000-0002-4557-2753","affiliations":[{"raw_affiliation_string":"Universit\u00e4t Trier, Deutschland","institution_ids":["https://openalex.org/I89864525"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I89864525"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.52464165,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T12864","display_name":"Psychoanalysis and Social Critique","score":0.11720000207424164,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12864","display_name":"Psychoanalysis and Social Critique","score":0.11720000207424164,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12753","display_name":"Psychology, Coaching, and Therapy","score":0.07639999687671661,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T14322","display_name":"Art, Aesthetics, and Perception","score":0.05999999865889549,"subfield":{"id":"https://openalex.org/subfields/1213","display_name":"Visual Arts and Performing Arts"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/optimal-distinctiveness-theory","display_name":"Optimal distinctiveness theory","score":0.7236999869346619},{"id":"https://openalex.org/keywords/contrast","display_name":"Contrast (vision)","score":0.5882999897003174},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.5479999780654907},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5149000287055969},{"id":"https://openalex.org/keywords/dispersion","display_name":"Dispersion (optics)","score":0.4496000111103058},{"id":"https://openalex.org/keywords/word-lists-by-frequency","display_name":"Word lists by frequency","score":0.4092000126838684}],"concepts":[{"id":"https://openalex.org/C47385372","wikidata":"https://www.wikidata.org/wiki/Q7098943","display_name":"Optimal distinctiveness theory","level":2,"score":0.7236999869346619},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6029999852180481},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.5882999897003174},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.580299973487854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5705999732017517},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.5479999780654907},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5149000287055969},{"id":"https://openalex.org/C177562468","wikidata":"https://www.wikidata.org/wiki/Q182893","display_name":"Dispersion (optics)","level":2,"score":0.4496000111103058},{"id":"https://openalex.org/C175293574","wikidata":"https://www.wikidata.org/wiki/Q697133","display_name":"Word lists by frequency","level":3,"score":0.4092000126838684},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.4049000144004822},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.3950999975204468},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28929999470710754},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.2533000111579895},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.25029999017715454}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.14943110","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14943110","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.14943110","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14943110","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.5139282941818237,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Measures":[0],"of":[1,9,39,87,142,148],"distinctiveness":[2],"are":[3,61],"important":[4],"tools":[5],"for":[6,36],"comparing":[7],"groups":[8],"texts":[10],"to":[11,22,50,68,73,98,138],"identify":[12],"each":[13,70],"group's":[14],"characteristic":[15],"features.":[16],"Evaluating":[17],"these":[18,88],"measures":[19,38,147],"is":[20,95],"essential":[21],"ensure":[23],"their":[24],"reliability":[25],"and":[26,59,77,129],"predictability.":[27],"In":[28,132],"our":[29,80,133],"research,":[30],"we":[31,52,82,91,106,136],"applied":[32],"a":[33,44,112,115],"new":[34,144],"method":[35,42],"evaluating":[37],"distinctiveness.":[40,149],"Our":[41],"uses":[43],"synthetically":[45],"generated":[46],"homogenous":[47],"text":[48],"corpus":[49],"which":[51],"insert":[53],"an":[54],"artificial":[55],"word":[56,113],"whose":[57],"frequency":[58,76,124],"dispersion":[60,99,117],"precisely":[62],"manipulated.":[63],"This":[64],"approach":[65],"allows":[66],"us":[67],"determine":[69],"measure's":[71],"sensitivity":[72],"variations":[74,100],"in":[75,125],"dispersion.":[78],"Through":[79],"evaluation,":[81],"uncovered":[83],"previously":[84],"unknown":[85],"characteristics":[86],"measures.":[89,104],"Specifically,":[90],"discovered":[92],"that":[93,108],"TF-IDF":[94],"more":[96],"sensitive":[97],"than":[101],"other":[102],"dispersion-based":[103],"Moreover,":[105],"found":[107],"Eta":[109],"cannot":[110],"detect":[111],"with":[114],"clear":[116],"contrast":[118],"when":[119],"it":[120],"has":[121],"the":[122,127],"same":[123],"both":[126],"target":[128],"comparison":[130],"groups.":[131],"next":[134],"steps,":[135],"aim":[137],"explore":[139],"practical":[140],"applications":[141],"this":[143],"knowledge":[145],"about":[146]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
