{"id":"https://openalex.org/W4406880294","doi":"https://doi.org/10.48550/arxiv.2501.15282","title":"AutoG: Towards automatic graph construction from tabular data","display_name":"AutoG: Towards automatic graph construction from tabular data","publication_year":2025,"publication_date":"2025-01-25","ids":{"openalex":"https://openalex.org/W4406880294","doi":"https://doi.org/10.48550/arxiv.2501.15282"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2501.15282","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2501.15282","pdf_url":"https://arxiv.org/pdf/2501.15282","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2501.15282","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073466350","display_name":"Zhikai Chen","orcid":"https://orcid.org/0009-0009-7305-8629"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Zhikai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100405589","display_name":"Han Xie","orcid":"https://orcid.org/0000-0002-2580-4297"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Han","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100409868","display_name":"Jian Zhang","orcid":"https://orcid.org/0000-0001-6520-9006"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101675531","display_name":"Xiang Song","orcid":"https://orcid.org/0000-0001-5030-5054"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"song, Xiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040639891","display_name":"Jiliang Tang","orcid":"https://orcid.org/0000-0001-7125-3898"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Jiliang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006581225","display_name":"Huzefa Rangwala","orcid":"https://orcid.org/0000-0003-0435-0035"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rangwala, Huzefa","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5082384108","display_name":"George Karypis","orcid":"https://orcid.org/0000-0003-2753-1437"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Karypis, George","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5073466350"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9882000088691711,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.642742395401001},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5938943028450012},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3453357517719269},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.20360437035560608}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.642742395401001},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5938943028450012},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3453357517719269},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.20360437035560608}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2501.15282","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2501.15282","pdf_url":"https://arxiv.org/pdf/2501.15282","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2501.15282","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2501.15282","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2501.15282","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2501.15282","pdf_url":"https://arxiv.org/pdf/2501.15282","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4406880294.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Recent":[0],"years":[1],"have":[2],"witnessed":[3],"significant":[4],"advancements":[5],"in":[6],"graph":[7,73,103,150,163],"machine":[8],"learning":[9],"(GML),":[10],"with":[11],"its":[12],"applications":[13],"spanning":[14],"numerous":[15],"domains.":[16],"However,":[17],"the":[18,72,100,173],"focus":[19],"of":[20,93,102,144,175],"GML":[21],"has":[22],"predominantly":[23],"been":[24],"on":[25],"developing":[26],"powerful":[27],"models,":[28,54],"often":[29],"overlooking":[30],"a":[31,136,142],"crucial":[32],"initial":[33],"step:":[34],"constructing":[35],"suitable":[36],"graphs":[37,177,189],"from":[38,202],"common":[39],"data":[40],"formats,":[41],"such":[42],"as":[43],"tabular":[44],"data.":[45],"This":[46],"construction":[47,74,104,110,151],"process":[48],"is":[49,124,178],"fundamental":[50],"to":[51,66,86,96,116,126,146,180],"applying":[52],"graph-based":[53],"yet":[55],"it":[56],"remains":[57],"largely":[58],"understudied":[59],"and":[60,76,98,106,148,184],"lacks":[61],"formalization.":[62],"Our":[63,197],"research":[64],"aims":[65],"address":[67],"this":[68,88],"gap":[69],"by":[70,194],"formalizing":[71],"problem":[75],"proposing":[77],"an":[78,156],"effective":[79],"solution.":[80],"We":[81],"identify":[82],"two":[83],"critical":[84,179],"challenges":[85],"achieve":[87],"goal:":[89],"1.":[90],"The":[91,168],"absence":[92],"dedicated":[94],"datasets":[95,145],"formalize":[97,147],"evaluate":[99,149],"effectiveness":[101],"methods,":[105],"2.":[107],"Existing":[108],"automatic":[109],"methods":[111],"can":[112,186,199],"only":[113],"be":[114,200],"applied":[115],"some":[117],"specific":[118],"cases,":[119],"while":[120],"tedious":[121],"human":[122,166,195],"engineering":[123],"required":[125],"generate":[127,187],"high-quality":[128,162,188],"graphs.":[129],"To":[130],"tackle":[131],"these":[132],"challenges,":[133],"we":[134,140,154],"present":[135],"two-fold":[137],"contribution.":[138],"First,":[139],"introduce":[141],"set":[143],"methods.":[152],"Second,":[153],"propose":[155],"LLM-based":[157],"solution,":[158],"AutoG,":[159],"automatically":[160],"generating":[161],"schemas":[164],"without":[165],"intervention.":[167],"experimental":[169],"results":[170],"demonstrate":[171],"that":[172,190],"quality":[174],"constructed":[176],"downstream":[181],"task":[182],"performance,":[183],"AutoG":[185],"rival":[191],"those":[192],"produced":[193],"experts.":[196],"code":[198],"accessible":[201],"https://github.com/amazon-science/Automatic-Table-to-Graph-Generation.":[203]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
