{"id":"https://openalex.org/W4226278384","doi":"https://doi.org/10.1145/3486622.3493928","title":"A Framework for Duplicate Detection from Online Job Postings","display_name":"A Framework for Duplicate Detection from Online Job Postings","publication_year":2021,"publication_date":"2021-12-14","ids":{"openalex":"https://openalex.org/W4226278384","doi":"https://doi.org/10.1145/3486622.3493928"},"language":"en","primary_location":{"id":"doi:10.1145/3486622.3493928","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3486622.3493928","pdf_url":null,"source":{"id":"https://openalex.org/S4363608074","display_name":"IEEE/WIC/ACM International Conference on Web Intelligence","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/WIC/ACM International Conference on Web Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032616837","display_name":"Yanchang Zhao","orcid":"https://orcid.org/0000-0002-0209-3971"},"institutions":[{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"funder","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Yanchang Zhao","raw_affiliation_strings":["Data61, CSIRO, Australia"],"affiliations":[{"raw_affiliation_string":"Data61, CSIRO, Australia","institution_ids":["https://openalex.org/I42894916","https://openalex.org/I1292875679"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062793472","display_name":"Haohui Chen","orcid":"https://orcid.org/0000-0001-8976-3634"},"institutions":[{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"funder","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Haohui Chen","raw_affiliation_strings":["Data61, CSIRO, Australia"],"affiliations":[{"raw_affiliation_string":"Data61, CSIRO, Australia","institution_ids":["https://openalex.org/I42894916","https://openalex.org/I1292875679"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033932131","display_name":"Claire Mason","orcid":"https://orcid.org/0000-0002-4412-5142"},"institutions":[{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"funder","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Claire M. Mason","raw_affiliation_strings":["Data61, CSIRO, Australia"],"affiliations":[{"raw_affiliation_string":"Data61, CSIRO, Australia","institution_ids":["https://openalex.org/I42894916","https://openalex.org/I1292875679"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5032616837"],"corresponding_institution_ids":["https://openalex.org/I1292875679","https://openalex.org/I42894916"],"apc_list":null,"apc_paid":null,"fwci":1.3823,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.85202842,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6757911443710327},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.6527884602546692},{"id":"https://openalex.org/keywords/usability","display_name":"Usability","score":0.6287375688552856},{"id":"https://openalex.org/keywords/tf\u2013idf","display_name":"tf\u2013idf","score":0.6111435890197754},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5566855669021606},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4912356734275818},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.45889022946357727},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.41898080706596375},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4087929427623749},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3494280278682709},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.22101670503616333},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.12901121377944946},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.08575955033302307}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6757911443710327},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.6527884602546692},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.6287375688552856},{"id":"https://openalex.org/C81758059","wikidata":"https://www.wikidata.org/wiki/Q796584","display_name":"tf\u2013idf","level":3,"score":0.6111435890197754},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5566855669021606},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4912356734275818},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.45889022946357727},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.41898080706596375},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4087929427623749},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3494280278682709},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.22101670503616333},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.12901121377944946},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.08575955033302307},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3486622.3493928","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3486622.3493928","pdf_url":null,"source":{"id":"https://openalex.org/S4363608074","display_name":"IEEE/WIC/ACM International Conference on Web Intelligence","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/WIC/ACM International Conference on Web Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.7300000190734863}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2012833704","https://openalex.org/W2085922539","https://openalex.org/W2145349611","https://openalex.org/W2152311353","https://openalex.org/W2152575748","https://openalex.org/W2613303232","https://openalex.org/W2894899500"],"related_works":["https://openalex.org/W3109760095","https://openalex.org/W2441615757","https://openalex.org/W3083038004","https://openalex.org/W4280513537","https://openalex.org/W2888077452","https://openalex.org/W167782738","https://openalex.org/W2790170883","https://openalex.org/W2997035466","https://openalex.org/W3186016112","https://openalex.org/W4285258521"],"abstract_inverted_index":{"Online":[0],"job":[1,9,29,36,41,53,66,91,103,155],"boards":[2,42,67],"have":[3,12],"greatly":[4],"improved":[5],"the":[6,49,63,69,83,107,134,169,201],"efficiency":[7],"of":[8,27,48,65,71,86,133],"searching":[10],"and":[11,40,68,110,121,130,137,158,178,189,194],"also":[13],"provided":[14],"valuable":[15],"data":[16],"for":[17,98],"labour":[18,72],"market":[19,50,73],"research.":[20],"However,":[21],"there":[22],"are":[23,147,159,173],"a":[24,96,127,142,150,154],"high":[25],"proportion":[26],"duplicate":[28,60,87,99],"postings":[30,54,61,104],"in":[31,204],"most":[32],"(if":[33],"not":[34],"all)":[35],"boards,":[37],"because":[38],"recruiters":[39],"seek":[43],"to":[44],"improve":[45],"their":[46,139],"coverage":[47],"by":[51,184],"integrating":[52],"from":[55,76,89,101,153],"many":[56],"different":[57,117],"sources.":[58],"These":[59],"undermine":[62],"usability":[64],"quality":[70],"analytics":[74],"derived":[75],"them.":[77],"In":[78],"this":[79],"paper,":[80],"we":[81,94],"tackle":[82],"challenging":[84],"problem":[85],"detection":[88,100],"online":[90,102],"postings.":[92],"Specifically,":[93],"design":[95],"framework":[97],"and,":[105],"under":[106],"framework,":[108],"implement":[109],"test":[111],"24":[112,135],"methods":[113,136,146,172,199],"built":[114],"with":[115,141,149,161,175,180,186,191],"four":[116,198],"tokenisers,":[118],"three":[119],"vectorisers":[120],"six":[122,162],"similarity":[123],"measures.":[124],"We":[125],"conduct":[126],"comparative":[128],"study":[129],"experimental":[131],"evaluation":[132],"compare":[138],"performance":[140,163],"baseline":[143,202],"approach.":[144],"All":[145],"tested":[148],"real-world":[151],"dataset":[152],"boarding":[156],"platform":[157],"evaluated":[160],"metrics.":[164],"The":[165],"experiment":[166],"reveals":[167],"that":[168,195],"top":[170],"two":[171],"Overlap":[174,179],"skip-gram":[176,192],"(OS)":[177],"n-gram":[181,187],"(OG),":[182],"followed":[183],"TFIDF-cosine":[185,190],"(TCG)":[188],"(TCS),":[193],"all":[196],"above":[197],"outperform":[200],"approach":[203],"detecting":[205],"duplicates.":[206]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
