{“id”:“https://openalex.org/W3174829772“,”doi“:”https://doi.org/10.1145/3451168“,”title“:”异类网页的页面级主内容提取“,”display_name“:”异类网页的页级主内容抽取“,”publication_year“:2021,”publiation_date“:”2021-06-28“,”ids“:{”openalex“:”https://openalex.org/W3174829772“,”doi“:”https://doi.org/10.1145/3451168“,”mag“:”3174829772“},”language“:”en“,”primary_location“:{”is_oa“:false,”landing_page_url“:”https://doi.org/10.1145/3451168“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S41523882“,”display_name“:”ACM事务从数据中发现知识“,”issn_l“:”1556-4681“,”isn“:[”1556-4781“,“1556-472X”],”is_oa“:false,”is_in_doaj“:false,”host_organization“:”https://openalex.org/P431031798“,”“host_organization_name”:“计算机协会”,“host_ordanization_lineage”:[“https://openalex.org/P431031798“],”host_organization_lineage_names“:[”Association for Computing Machinery“],“type”:“journal”},“license”:null,“licence_id”:null,“version”:null,“is_accepted”:false,“is_published”:false},”type“:”article“,”type_crossref“:“jornal-article”,“indexed_in”:[”crossref“],‘open_access”:{“is_oa”:true,“oa_status”:“green”,“oa_url”:“”https://riunet.upv.es/bitstream/10251/181752/3/AlarteSilva%20-%20Page-Level%20Main%20Content%20Extraction%20from%20Heterogeneous%20Webpages.pdf“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”first“,”author“:”https://openalex.org/A5060938121“,”display_name“:”Juli\u00e1n Alarte“,”orcid“:null},”institutions“:[{”id“:”https://openalex.org/I60053951“,”display_name“:”Universitat Polit\u00e8cnica de Val\u00e 8ncia“,”err“:”https://ror.org/01460j859“,”country_code“:”ES“,”type“:“教育”,”世系“:[”https://openalex.org/I60053951“]}],”国家“:[”ES“],”is_corresponding“:false,”raw_author_name“:”Juli\u00e1n Alarte“,”raw _ afiliation_strings“:[“西班牙政治大学\u00e 8cnica de Val\u00e_8ncia”]},{“author_position”:“last”,“author”:{“id”:“https://openalex.org/A5003503212“,”display_name“:”Josep Silva“,”orcid“:”https://orcid.org/0000-0001-5096-0008},“机构”:[{“id”:https://openalex.org/I60053951“,”display_name“:”Universitat Polit\u00e8cnica de Val\u00e 8ncia“,”err“:”https://ror.org/01460j859“,”country_code“:”ES“,”type“:“教育”,”世系“:[”https://openalex.org/I60053951“]}],”countries“:[”ES“],”is_corresponding“:false,”raw_author_name“:”Josep Silva“,”raw _affiliation_strings“:【”Universitat Polit\u00e8cnica de Val\u00e 8ncia,Spain“】}],“countries_distict_count”:1,“institutions_distinact_count”:2,“corresponding_author_ids”:[],“correcponding_institution_ids“:[]”,“apc_list”:null,“apc”c_payd“:null,”has_fulltext“:true,”fulltext_origin“:”pdf“,”cited_by_count“:5,”cited_by_percentile_year“:{”min“:82,”max“:84},”biblio“:{“volume”:“15”,”issue“:”6“,”first_page“:”1“,”last_page“:https://openalex.org/T2016“,”display_name“:”Web数据提取和爬网技术“,”score“:1.0,”subfield“:{”id“:”https://openalex.org/subfields/1710“,”display_name“:”Information Systems“},”field“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T2016“,”“display_name”:“Web数据提取和爬网技术”,“score”:1.0,“subfield”:{“id”:“https://openalex.org/subfields/1710“,”display_name“:”Information Systems“},”field“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T11269“,”display_name“:”文本压缩和索引算法“,”score“:0.968,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T11478“,”display_name“:”以内容为中心的信息传递网络“,”score“:0.9252,”subfield“:{”id“:”https://openalex.org/subfields/1705“,”display_name“:”计算机网络和通信“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/web-data-extraction网站“,”display_name“:”Web数据提取“,”score“:0.65009},{”id“:”https://openalex.org/keywords/page-segration网站“,”display_name“:”页面分段“,”score“:0.54974},{”id“:”https://openalex.org/keywords/web-crawling网站“,”display_name“:”Web爬行“,”score“:0.533066},{”id“:”https://openalex.org/keywords/content-adaptation网站“,”display_name“:”内容改编“,”score“:0.527148},{”id“:”https://openalex.org/keywords/text-indexing网站“,”display_name“:”文本索引“,”score“:0.526026}],”concepts“:[{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.83668137},{”id“:”https://openalex.org/C170858558,“wikidata”:https://www.wikidata.org/wiki/Q1394144“,”display_name“:”自动摘要“,”level“:2,”score“:0.76970696},{”id“:”https://openalex.org/C21959979,“wikidata”:https://www.wikidata.org/wiki/Q36774“,”display_name“:”Web page“,”level“:2,”score“:0.7694118},{”id“:”https://openalex.org/C137922610,“wikidata”:https://www.wikidata.org/wiki/Q2093“,”display_name“:”文档对象模型“,”level“:3,”score“:0.69244844},{”id“:”https://openalex.org/C79581498,“wikidata”:https://www.wikidata.org/wiki/Q1367530“,”display_name“:”Suite“,”level“:2,”score“:0.55768466},{”id“:”https://openalex.org/C23123220,“wikidata”:https://www.wikidata.org/wiki/Q816826“,”display_name“:”信息检索“,”level“:1,”score“:0.542731},{”id“:”https://openalex.org/C195807954,“wikidata”:https://www.wikidata.org/wiki/Q1662562“,”display_name“:”信息提取“,”level“:2,”score“:0.4725109},{”id“:”https://openalex.org/C2776324614,“wikidata”:https://www.wikidata.org/wiki/Q3948731“,”display_name“:”Web内容“,”级别“:3,”分数“:0.4208253},{”id“:”https://openalex.org/C136764020,“wikidata”:https://www.wikidata.org/wiki/Q466“,”display_name“:”万维网“,”level“:1,”score“:0.37015492},{”id“:”https://openalex.org/C95457728,“wikidata”:https://www.wikidata.org/wiki/Q309“,”display_name“:”History“,”level“:0,”score“:0.0},{”id“:”https://openalex.org/C166957645,“wikidata”:https://www.wikidata.org/wiki/Q23498“,”display_name“:”Archaeology“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:2,”locations“:[{”is_oa“:false,”landing_page_url“:”https://doi.org/10.1145/3451168“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S41523882“,”display_name“:”ACM事务从数据中发现知识“,”issn_l“:”1556-4681“,”isn“:[”1556-4781“,“1556-472X”],”is_oa“:false,”is_in_doaj“:false,”host_organization“:”https://openalex.org/P431031798“,”“host_organization_name”:“计算机协会”,“host_ordanization_lineage”:[“https://openalex.org/P431031798“],”host_organization_lineage_names“:[”Association for Computing Machinery“],“type”:“journal”},“license”:null,“licence_id”:null,“version”:null,“is_accepted”:false,“is_published”:false},{“is_oa”:true,“landing_page_url”:“http://hdl.handle.net/10251/181752,“pdf_url”:https://riunet.upv.es/bitstream/10251/181752/3/AlarteSilva%20-%20页面级别%20主%20内容%20提取%20来自%20异构%20网页.pdf“,”源“:{”id“:”https://openalex.org/S4306401500“,”display_name“:”RiuNet(巴伦西亚理工大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I60053951“,”“host_organization_name”:“Universitat Polit\u00e8cnica de Val\u00e 8ncia”,“host_organization_lineage”:[“https://openalex.org/I60053951“],”host_organization_lineage_names“:[”Universitat Polit\u00e8cnica de Val\u00e 8ncia“],“type”:“repository”},“license”:“cc-by-nc-nd”,“licence_id”:“https://openalex.org/licenses/cc-by-nc-nd“,”version“:”publishedVersion“,”is_accepted“:true,”is_published“:true}],”best_oa_location“:{”is_oa“:true,”landing_page_url“:”http://hdl.handle.net/10251/181752,“pdf_url”:https://riunet.upv.es/bitstream/10251/181752/3/AlarteSilva%20-%20Page-Level%20Main%20Content%20Extraction%20from%20Heterogeneous%20Webpages.pdf“,”source“:{”id“:”https://openalex.org/S4306401500“,”display_name“:”RiuNet(巴伦西亚理工大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I60053951“,”“host_organization_name”:“Universitat Polit\u00e8cnica de Val\u00e 8ncia”,“host_organization_lineage”:[“https://openalex.org/I60053951“],”host_organization_lineage_names“:[”Universitat Polit\u00e8cnica de Val\u00e 8ncia“],“type”:“repository”},“license”:“cc-by-nc-nd”,“licence_id”:“https://openalex.org/licenses/cc-by-nc-nd“,”version“:”publishedVersion“,”is_accepted“:true,”is_published“:true},”sustainable_development_goals“:[],”grants“:{”funder“:”https://openalex.org/F4320321864“,”“funder_display_name”:“Generalita Valenciana”,“award_id”:“Prometeo/2019/098(DeepTrust)”}],“数据集”:[],“版本”:[],“referenced_works_count”:27,“referrenced_works”:[”https://openalex.org/W1975700640","https://openalex.org/W1989338554","https://openalex.org/W2019441211","https://openalex.org/W2019577381","https://openalex.org/W2040075907","https://openalex.org/W2042970189","https://openalex.org/W2048192672","https://openalex.org/W2049781914","https://openalex.org/W2063147917","https://openalex.org/W2072489225","https://openalex.org/W2076910790","https://openalex.org/W2093907956","https://openalex.org/W2096478255","https://openalex.org/W2097705449","https://openalex.org/W2117209866","https://openalex.org/W2117694587","https://openalex.org/W2120101509","https://openalex.org/W2130186082","https://openalex.org/W2140208587","https://openalex.org/W2147220393","https://openalex.org/W2151588647","https://openalex.org/W2158051716","https://openalex.org/W2164565864","https://openalex.org/W2201534957","https://openalex.org/W2568933682","https://openalex.org/W2898766670","https://openalex.org/W2964079897“],”related_works“:[”https://openalex.org/W36911888网址","https://openalex.org/W3144508074","https://openalex.org/W2951920527","https://openalex.org/W2897171874","https://openalex.org/W2373402338","https://openalex.org/W2278505189","https://openalex.org/W2031790754","https://openalex.org/W2012575882","https://openalex.org/W2003578783","https://openalex.org/W1987716395“],”ngrams_url“:”https://api.openalex.org/works/W3174829772/ngrams网站“,”“abstract_inverted_index”:{“The”:[0],“main”:[1,31,70,96141],“content”:[2,32,71,84,97178],“of”:[3,53,68114166],“a”:[4,51,92123144164],“webpage”:[5137],“is”:[6,40,72122147],“经常”:[7],“包围”:[8],“by”:[9],“other”:[10,3712176],“样板”:[11],“elements”:[12],“相关”:[13],“到”:[14,86101133138151],““:[15,30,34,64,69140161],”模板“,:[16],”此类“:[17,55,77116],”作为“:[18,56,78117150],”菜单“:[19],”广告“:[20],”版权“:[21],”通知“:[22],”和“:[23,27,36,46,60,66,83119],”评论。“:[24],”对于“:[25],”爬虫“:[26],”索引器,“:[28],”隔离“:[29],”来自“:[33],”模板“:[35],”噪音“:[38,48],”信息“:[39,49],”an“:[41],”基本“:[42],”任务“:[43],”因为“:[44],”处理“:[45],”存储“:[47],”生产“:[50],”废物“:[52],”资源“:[54],“带宽”:[57],“存储”:[58],“空间”:[59],“计算”:[61],“时间”。“:[62],”除此之外,“:[63],”检测“:[65],”提取“:[67179],”有用“:[73],”in“:[74],”different“:[75],”区域“:[76],”数据“:[79],”挖掘“:[80],”web“:[81],”摘要“:[82],”自适应“:[85],”低“:[87],”分辨率。“:[88],”此“:[89],”工作“:[90],”介绍“:[91],”新“:[93],”技术“:[94105162],”用于“:[95],”提取。“:[98],”In“:[99],”contrast“:[100],”most“:[102],”technologies“,:[103],”this“:[104],”not“:[106],”only“:[107131],”extracts“:[108],”text“,”:[109],“but”:[110],“also”:[111],“types”:[113],“content,”:[115],“images,”:%118],“animations。“:[120],”It“:[121],”Document“:[124],”Object“:%125],”Model-based“:[126],”pagelevel“:/127],”technology“:[128],”thus“:[129],”It“:[130146],”needs“:[132],”load“:+134],”one“:/135],”single“:136],”extract“:%139],”content“。“:[142],”As“:[143],”后果“:[145],”高效“:[148],”足够“:[149],”be“:[152],”used“:[153],”online“:[154],”(in“:[155],”实时)。“:[156],“我们”:[157],“有”:[158],“经验”:[159],“评估”:[160],“使用”:[163],“套件”:[165],“真实”:[167],“异质”:[168],“基准”:[169],“生产”:[170],“非常”:[171],“良好”:[172],“结果”:[173],“比较”:[174],“与”:[175],“众所周知”:[177],“技术。“:[180]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W3174829772“,”counts_by_year“:[{”年份“:2023,”cited_by_count“:1},{”年“:2022,”ciped_by_cunt“:3},”{“年份”:2021,“cited_by_count”:1}],”更新日期“:”2024-06-13T17:53:52.375965“,”创建日期“:“2021-07-05”}