{"id":"https://openalex.org/W4409671330","doi":"https://doi.org/10.1145/3696410.3714701","title":"MixedSAND: Semantic Annotation of Mixed-unit Numeric Data","display_name":"MixedSAND: Semantic Annotation of Mixed-unit Numeric Data","publication_year":2025,"publication_date":"2025-04-22","ids":{"openalex":"https://openalex.org/W4409671330","doi":"https://doi.org/10.1145/3696410.3714701"},"language":"en","primary_location":{"id":"doi:10.1145/3696410.3714701","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3696410.3714701","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3696410.3714701","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3696410.3714701","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093105994","display_name":"Amir Behrad Khorram Nazari","orcid":"https://orcid.org/0009-0008-3287-4964"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Amir Behrad Khorram Nazari","raw_affiliation_strings":["University of Alberta, Edmonton, AB, Canada"],"affiliations":[{"raw_affiliation_string":"University of Alberta, Edmonton, AB, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071837282","display_name":"Davood Rafiei","orcid":"https://orcid.org/0000-0003-2403-0266"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Davood Rafiei","raw_affiliation_strings":["University of Alberta, Edmonton, AB, Canada"],"affiliations":[{"raw_affiliation_string":"University of Alberta, Edmonton, AB, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014272131","display_name":"M\u00e1rio A. Nascimento","orcid":"https://orcid.org/0000-0002-7609-1805"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mario A. Nascimento","raw_affiliation_strings":["Northeastern University, Vancouver, BC, Canada"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Vancouver, BC, Canada","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5093105994"],"corresponding_institution_ids":["https://openalex.org/I154425047"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.09127384,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"178","last_page":"187"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9625999927520752,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9625999927520752,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13067","display_name":"Geological Modeling and Analysis","score":0.9605000019073486,"subfield":{"id":"https://openalex.org/subfields/1906","display_name":"Geochemistry and Petrology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11801","display_name":"Reservoir Engineering and Simulation Methods","score":0.9523000121116638,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8240196704864502},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5002062320709229},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.44579246640205383},{"id":"https://openalex.org/keywords/unit","display_name":"Unit (ring theory)","score":0.4376699924468994},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4280881881713867},{"id":"https://openalex.org/keywords/semantic-annotation","display_name":"Semantic annotation","score":0.41660821437835693},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3292280435562134},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08212155103683472}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8240196704864502},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5002062320709229},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.44579246640205383},{"id":"https://openalex.org/C122637931","wikidata":"https://www.wikidata.org/wiki/Q118084","display_name":"Unit (ring theory)","level":2,"score":0.4376699924468994},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4280881881713867},{"id":"https://openalex.org/C2985727698","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Semantic annotation","level":3,"score":0.41660821437835693},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3292280435562134},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08212155103683472},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3696410.3714701","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3696410.3714701","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3696410.3714701","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3696410.3714701","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3696410.3714701","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3696410.3714701","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2165548363","display_name":null,"funder_award_id":"Canada","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"}],"funders":[{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4409671330.pdf","grobid_xml":"https://content.openalex.org/works/W4409671330.grobid-xml"},"referenced_works_count":21,"referenced_works":["https://openalex.org/W1576534100","https://openalex.org/W1981276685","https://openalex.org/W2004305018","https://openalex.org/W2342096063","https://openalex.org/W2529049456","https://openalex.org/W2739113951","https://openalex.org/W2752618741","https://openalex.org/W2798664493","https://openalex.org/W2889003264","https://openalex.org/W2896767501","https://openalex.org/W2898629972","https://openalex.org/W2898796029","https://openalex.org/W2899297497","https://openalex.org/W2904530076","https://openalex.org/W2992252352","https://openalex.org/W4210451781","https://openalex.org/W4233014035","https://openalex.org/W4380433117","https://openalex.org/W4387846857","https://openalex.org/W6600075759","https://openalex.org/W6987030581"],"related_works":["https://openalex.org/W2369337446","https://openalex.org/W2126412646","https://openalex.org/W4385569300","https://openalex.org/W3032255889","https://openalex.org/W2574165153","https://openalex.org/W2251095918","https://openalex.org/W2206855020","https://openalex.org/W2105339902","https://openalex.org/W2123888462","https://openalex.org/W1978635732"],"abstract_inverted_index":{"Quantitative":[0],"information":[1],"about":[2],"entities":[3],"constitutes":[4],"a":[5,55,88,115,120],"significant":[6,26],"portion":[7],"of":[8,36,123,131,149],"tabular":[9],"data":[10,15,44],"in":[11,40,65,114,147,152],"open":[12],"sources":[13,50],"and":[14,22,30,51,69,95,106,128,145,157],"lakes.Such":[16],"tables":[17],"often":[18],"lack":[19],"consistent":[20],"labeling":[21],"proper":[23],"schema,":[24],"posing":[25],"challenges":[27],"for":[28,67,71,81,90],"querying":[29],"integration.This":[31],"paper":[32],"studies":[33],"the":[34,79,110],"problem":[35],"numerical":[37],"column":[38],"annotation":[39,93,132],"scenarios":[41],"where":[42],"quantitative":[43],"may":[45,60],"be":[46],"gathered":[47],"from":[48],"different":[49],"unit":[52,76],"consistency":[53],"is":[54],"concern.For":[56],"instance,":[57],"weight":[58],"measurements":[59],"vary":[61],"between":[62],"entities,":[63],"expressed":[64],"kilograms":[66],"some":[68],"pounds":[70],"others,":[72],"with":[73,125,160],"no":[74],"accompanying":[75],"information.We":[77],"investigate":[78],"conditions":[80],"effectively":[82],"annotating":[83,158],"mixed-unit":[84],"numeric":[85],"data,":[86],"introduce":[87],"benchmark":[89],"such":[91,142],"an":[92,97],"task,":[94],"propose":[96],"algorithm":[98],"that":[99,135],"reliably":[100],"detects":[101],"semantic":[102,162],"types":[103,112],"(e.g.,":[104],"height)":[105],"links":[107],"them":[108,159],"to":[109],"corresponding":[111],"present":[113],"knowledge":[116],"graph.Our":[117],"evaluation":[118],"on":[119],"diverse":[121],"set":[122],"columns":[124],"mixed":[126,155],"units":[127,156],"varying":[129],"levels":[130],"difficulty":[133],"shows":[134],"our":[136],"method":[137],"significantly":[138],"outperforms":[139],"strong":[140],"baselines":[141],"as":[143],"GPT-4o-mini":[144],"SAND":[146],"terms":[148],"accuracy,":[150],"excelling":[151],"both":[153],"detecting":[154],"appropriate":[161],"labels.":[163]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
