{“id”:“https://openalex.org/W3199243620“,”doi“:”https://doi.org/10.18653/v1/2021.emnlp-main.833“,”title“:“IndoBERTweet:印尼推特的预训练语言模型,具有有效的领域特定词汇初始化”,“display_name”:“IndoBERTweet:印度尼西亚推特的预先训练语言模型(具有有效的域特定词汇初始化)”,“publication_year”:2021,“publitation_date”:“2021-01-01”,“ids”:{“openalex”:“https://openalex.org/W3199243620“,”doi“:”https://doi.org/10.18653/v1/2021.emnlp-main.833“,”mag“:”3199243620“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://doi.org/10.18653/v1/2021.emnlp-main.833,“pdf_url”:https://aclantology.org/2021.emnlp-main.833.pdf,“源”:{“id”:https://openalex.org/S4363608991“,”display_name“:”2021年自然语言处理实证方法会议记录“,”issn_l“:null,”issn“:nul,”is_oa“:false,”is_ in_doaj“:false,”host_organization“:nuld,”hose_organization_name“:null,“host_orgganization_lineage”:[],“hosd_organisation_lineage_names”:[[],”type“:”Conference“},”license“:”cc-by“,”licence_id“:”https://openalex.org/licenses/cc-by“,”version“:”publishedVersion“,”is_accepted“:true,”is_published“:true},”type“:”article“,”type_crossref“:“procesdings-article”,”indexed_in“:[”crossref“],”open_access“:{”is_oa“:true,”oa_status“:”hybrid“,”oa_url“:”https://aclantology.org/2021.emnlp-main.833.pdf“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”第一“,”作者“:{”id“:”https://openalex.org/A5065822589“,”display_name“:”Fajri Koto“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:true,”raw_author_name“:”Fajri-Goto“,”raw _affiliation_string“:[”墨尔本大学计算与信息系统学院“]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5032767467“,”display_name“:”Jey Han Lau“,”orcid“:”https://orcid.org/0000-0002-1647-4628“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Jey Han Lau“,”raw关联字符串“:[”墨尔本大学计算与信息系统学院“]},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5060332228“,”display_name“:”蒂莫西·鲍德温“,”兽人“:”https://orcid.org/0000-0003-4525-6950“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Timothy Baldwin“,”raw _ afiliation_strings“:[”墨尔本大学计算与信息系统学院“]}],”countries _ distinct_count“:0,”institutions _ disting_count”:0,“corresponding_author_ids”:[”https://openalex.org/A5065822589“],”corresponding_institution_ids“:[],”apc_list“:null,”apc _ paid“:null,”has _ fulltext“:true,”fulltext_origin“:”pdf“,”cited_by_count“:20,”citecd_by_percentile_year“:{”min“:95,”max“:96},”biblio“:{volume“:null,”issue“:nul,”first_page“:null},“last_page”:null{,”is_retracted“:false,”is_paratext“:false,”primarit ry_topic“:{”id“:”https://openalex.org/T10028“,”display_name“:”自然语言处理“,”score“:0.9999,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10028“,”display_name“:”自然语言处理“,”score“:0.9999,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10181“,”display_name“:”统计机器翻译和自然语言处理“,”score“:0.9997,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T13629“,”display_name“:”自动文本简化和可读性评估“,”score“:0.9708,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/topic-modeling网站“,”display_name“:”Topic Modeling“,”score“:0.507338}],”concepts“:[{”id“:”https://openalex.org/C114466953,“wikidata”:https://www.wikidata.org/wiki/Q6034165“,”display_name“:”Initialization“,”level“:2,”score“:0.89928806},{”id“:”https://openalex.org/C2777601683,“wikidata”:https://www.wikidata.org/wiki/Q6499736“,”display_name“:”词汇“,”级别“:2,”分数“:0.8375272},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.8317306},{”id“:”https://openalex.org/C277920738网址,“wikidata”:https://www.wikidata.org/wiki/Q9240“,”display_name“:”印尼语“,”level“:2,”score“:0.7661821},{”id“:”https://openalex.org/C185798385,“wikidata”:https://www.wikidata.org/wiki/Q1161707“,”display_name“:”基准(测量)“,”level“:2,”score“:0.6626632},{”id“:”https://openalex.org/C192209626,“wikidata”:https://www.wikidata.org/wiki/Q190909“,”display_name“:”Focus(optics)“,”level“:2,”score“:0.6234416},{”id“:”https://openalex.org/C204321447,“wikidata”:https://www.wikidata.org/wiki/Q30642“,”display_name“:”自然语言处理“,”level“:1,”score“:0.6154107},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.6041761},{”id“:”https://openalex.org/C2776434776,“wikidata”:https://www.wikidata.org/wiki/Q19246213“,”display_name“:”域自适应“,”level“:3,”score“:0.58469665},{”id“:”https://openalex.org/C137293760,“wikidata”:https://www.wikidata.org/wiki/Q3621696“,”display_name“:”语言模型“,”level“:2,”score“:0.57490224},{”id“:”https://openalex.org/C41608201,“wikidata”:https://www.wikidata.org/wiki/Q980509“,”display_name“:”Embedding“,”level“:2,”score“:0.53202504},{”id“:”https://openalex.org/C2779227376,“wikidata”:https://www.wikidata.org/wiki/Q6505497“,”display_name“:”Layer(electronics)“,”level“:2,”score“:0.5210937},{”id“:”https://openalex.org/C36503486,“wikidata”:https://www.wikidata.org/wiki/Q11235244“,”display_name“:”域(数学分析)“,”level“:2,”score“:0.4989004},{”id“:”https://openalex.org/C90805587,“wikidata”:https://www.wikidata.org/wiki/Q10944557“,”display_name“:”Word(群论)“,”level“:2,”score“:0.4926203},{”id“:”https://openalex.org/C139807058,“wikidata”:https://www.wikidata.org/wiki/Q352374“,”display_name“:”适应(眼睛)“,”级别“:2,”分数“:0.48663887},{”id“:”https://openalex.org/C28490314,“wikidata”:https://www.wikidata.org/wiki/Q189436“,”display_name“:”语音识别“,”level“:1,”score“:0.48630986},{”id“:”https://openalex.org/C41895202,“wikidata”:https://www.wikidata.org/wiki/Q8162“,”display_name“:”语言学“,”level“:1,”score“:0.18133172},{”id“:”https://openalex.org/C15744967,“wikidata”:https://www.wikidata.org/wiki/Q9418“,”display_name“:”心理学“,”等级“:0,”分数“:0.06586513},{”id“:”https://openalex.org/C33923547,“wikidata”:https://www.wikidata.org/wiki/Q395“,”display_name“:”数学“,”等级“:0,”分数“:0.05393076},{”id“:”https://openalex.org/C134306372,“wikidata”:https://www.wikidata.org/wiki/Q7754“,”display_name“:”数学分析“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C138885662,“wikidata”:https://www.wikidata.org/wiki/Q5891“,”display_name“:”哲学“,”等级“:0,”分数“:0.0},{”id“:”https://openalex.org/C121332964,“wikidata”:https://www.wikidata.org/wiki/Q413“,”display_name“:”物理“,”级别“:0,”分数“:0.0},{”id“:”https://openalex.org/C185592680,“wikidata”:https://www.wikidata.org/wiki/Q2329“,”display_name“:”Chemistry“,”level“:0,”score“:0.0},{”id“:”https://openalex.org/C13280743,“wikidata”:https://www.wikidata.org/wiki/Q131089“,”display_name“:”大地测量学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C178790620,“wikidata”:https://www.wikidata.org/wiki/Q11351“,”display_name“:”有机化学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C169760540,“wikidata”:https://www.wikidata.org/wiki/Q207011“,”display_name“:”Neuroscience“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C95623464,“wikidata”:https://www.wikidata.org/wiki/Q1096149“,”display_name“:”分类器(UML)“,”level“:2,”score“:0.0},{”id“:”https://openalex.org/C120665830,“wikidata”:https://www.wikidata.org/wiki/Q14620“,”display_name“:”Optics“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C199360897,“wikidata”:https://www.wikidata.org/wiki/Q9143“,”display_name“:”编程语言“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C205649164,“wikidata”:https://www.wikidata.org/wiki/Q1071“,”display_name“:”Geography“,”level“:0,”score“:0.0}],”mesh“:[],”locations_count“:2,”location“:[{”is_oa“:true,”landing_page_url“:”https://doi.org/10.18653/v1/2021.emnlp-main.833,“pdf_url”:https://aclantology.org/2021.emnlp-main.833.pdf,“源”:{“id”:https://openalex.org/S4363608991“,”display_name“:”2021年自然语言处理实证方法会议记录“,”issn_l“:null,”issn“:nul,”is_oa“:false,”is_ in_doaj“:false,”host_organization“:nuld,”hose_organization_name“:null,“host_orgganization_lineage”:[],“hosd_organisation_lineage_names”:[[],”type“:”Conference“},”license“:”cc-by“,”licence_id“:”https://openalex.org/licenses/cc-by“,”版本“:”已发布版本“,”is_accepted“:true,”is_published“:true},{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2109.04607,“pdf_url”:https://arxiv.org/pdf/2109.04607,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:null,“licence_id”:null,“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://doi.org/10.18653/v1/2021.emnlp-main.833,“pdf_url”:https://aclantology.org/2021.emnlp-main.833.pdf,“源”:{“id”:https://openalex.org/S4363608991“,”display_name“:“2021自然语言处理实证方法会议记录”,“issn_l”:null,“issn”:null,“is_oa”:false,“is_in_doaj”:false,“host_organization”:null,“host_organization_name”:null,“host_organization_lineage”:[],“host_organization_lineage_names”:[],“type”:“Conference”},“license”:“cc-by”,“license_id”:“https://openalex.org/licenses/cc-by“,”version“:”publishedVersion“,”is_accepted“:true,”is_published“:true},”sustainable_development_goals“:[{”id“:”https://metadata.un.org/sdg/4“,”display_name“:”素质教育“,”score“:0.73}],”grants“:[],”datasets“:【】,”versions“:【],”referenced_works_count“:17,”referrenced_works“:[”https://openalex.org/W2493916176“,”https://openalex.org/W2760505947“,”https://openalex.org/W2801887493“,”https://openalex.org/W2807333695“,”https://openalex.org/W2916132663“,”https://openalex.org/W2962784628“,”https://openalex.org/W2963341956“,”https://openalex.org/W2963716420“,”https://openalex.org/W2964078775“,”https://openalex.org/W2973089652“,”https://openalex.org/W3034238904“,”https://openalex.org/W3098466758“,”https://openalex.org/W3099008231“,”https://openalex.org/W3099950029“,”https://openalex.org/W3103727211“,”https://openalex.org/W3104186312“,”https://openalex.org/W3105601216“],”related_works“:[”https://openalex.org/W4378469273“,”https://openalex.org/W4376166954“,”https://openalex.org/W4312910505“,”https://openalex.org/W4283069728“,”https://openalex.org/W4281397339“,”https://openalex.org/W3213207129“,”https://openalex.org/W3199243620“,”https://openalex.org/W3116295307“,”https://openalex.org/W2152148513“,”https://openalex.org/W1963734729“],”ngrams_url“:”https://api.openalex.org/works/W3199243620/ngrams网站“,”abstract_inverted_index“:{”We“:[0],”present“:[1],”INDOBERTWEET“:[2],”the“:[3,41,53],”first“:[4],”largescale“:[5],”预处理“:[6],”model“:[7,20,30],”for“:[8,45,70],”Indonesian“:[9,18],”Twitter“:[10],”that“:[11,50],”is“:[12,64],”trained“:[13],”by“:[14],“扩展”:[15],“a”:[16],“单语训练”:[17],“BERT”:[19,42,55],“with”:[21,52],“添加剂”:[22],“特定领域”:[23],“词汇。我们“:[24],“焦点”:[25],“in”:[26,73],“special”:[27],“on”:[28],“efficient”:[29],“adaptation”:[31,72],“under”:[32],“词汇表”:[33,71],“不匹配”:[34],“and”:[35,63],“基准”:[36],“different”:[37],“ways”:[38],“of”:[39,75],“initializing”:[40,51],“嵌入”:[43,57],“图层”:[44],“新建”:[46],“单词”:[47],“类型”。We“:[48],”find“:[49],”average“:[54],”subword“:[P6],”makes“:[58],”pretraining“:[59],”five“:[60],”times“:/61],”更快“:[62],”more“:[65],”effect“:[66],”than“:[67],”proposed“:[68],”methods“:[69],”terms“:[74],”extrinsic“:[76],”evaluation“:[77],”over“:[78],”七个“:[79],”基于推特“:[80],”数据集。“:[81],”1“:[82]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W3199243620“,”counts_by_year“:[{“年份”:2024,”cited_by_count“:2},{“年度”:2023,”ciped_by_cunt“:11},”{“年”:2022,“cited_by_count”:7}],”updated_date“:”2024-05-23T08:44.44.979076“,”created_date:“2021-09-27”}