{“id”:“https://openalex.org/W4221164116网址“,”doi“:”https://doi.org/10.48550/arxiv.2203.03466“,”title“:“张量程序V:通过零炮超参数传输调整大型神经网络”,“display_name”:“张量程序V:使用零炮超参传输调整大型神经元网络”,”publication_year“:2022,”publitation_date“:”2022-01-01“,”ids“:{”openalex“:”https://openalex.org/W4221164116网址“,”doi“:”https://doi.org/10.48550/arxiv.2203.03466“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2203.03466“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa网站“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”type“:”preprint“,”type_crossref“:“posted-content”,”indexed_in“:[”arxiv“,”datacite“],”open_access“:{”is_oa“:true,”oa_status“:”green“,”oa_url“:”https://arxiv.org/abs/2203.03466“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”第一“,”作者“:{”id“:”https://openalex.org/A506111557“,”display_name“:”Greg Yang“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Yang,Greg“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5025108863“,”display_name“:”Edward J.Hu“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Hu,Edward J“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5085244021“,”display_name“:”Igor Babuschkin“,”orcid“:”https://orcid.org/0000-0001-5156-5333“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”Babuschkin,Igor“,”raw_affiliation_strings“:[],“从属关系”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5051574174“,”display_name“:”Szymon Sidor“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Sidor,Szyman“,”raw _affiliation_strings“:],”affiliations“:【】},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5035168940“,”display_name“:”刘晓东“,”兽人“:”https://orcid.org/0000-0002-3733-451X“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”刘,晓东“,”raw关联字符串“:[],“隶属关系”:[]},{“author_position”:“middle”,“author”:{“id”:“https://openalex.org/A5025331564“,”display_name“:”David Farhi“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Farhi,David“,”raw _affiliation_strings“:],”affiliations“:[/]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5072061934“,”display_name“:”N.C.Ryder“,”orcid“:null},”institutions“:[],”countries“:[],”is_correresponsing“:false,”raw_author_name“:”Ryder,Nick“,”raw_affiation_strings“:[],”affiliations“:[]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5011053473“,”display_name“:”Jakub Pachocki“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Pachocky,Jakub“,”raw _affiliation_strings“:],”affiliations“:【】},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5051745436“,”display_name“:”Weizhu Chen“,”orcid“:null},”institutions“:[],”countries“:[],”is_correresponsing“:false,”raw_author_name“:”Chen,Weizhu“,”raw_affiation_strings“:[],”affiliations“:[]},{”author_position“:”last“,”author“:{”id“:”https://openalex.org/A5047233371“,”display_name“:”尖峰高“,”兽人“:”https://orcid.org/0000-0002-6371-505X“},”机构“:[],”国家“:[],”is_correresponsing“:false,”raw_author_name“:”Gao,Jianfeng“,”raw_affiation_strings“:[],”附属机构“:[]}],”国家_地区_计数“:0,”机构_地区_计数“:0,”对应机构_授权人“:[],”对应机构_ ID“:[],”apc_list“:null,”apc_payed“:null,”fwci“:1.566,”has_fulltext“:false,”cited_by_count“:3,”cited_by_percentile_year“:{“min”:81,“max”:84},“biblio”:{”volume“:null,“issue”:null、“first_page”:null,“last_page”:null},”is_retracted“:false,”is_paratext“:false,”primary_topic“:{”id“:”https://openalex.org/T10054“,”display_name“:”并行计算与性能优化“,”score“:0.9844,”subfield“:{”id“:”https://openalex.org/subfields/1708“,”display_name“:”硬件和体系结构“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10054“,”display_name“:”并行计算与性能优化“,”score“:0.9844,”subfield“:{”id“:”https://openalex.org/subfields/1708“,”display_name“:”硬件和体系结构“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10036“,”display_name“:”计算机视觉和图像识别深度学习“,”score“:0.9815,”subfield“:{”id“:”https://openalex.org/subfields/1707“,”display_name“:”计算机视觉和模式识别“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T13650“,”“display_name”:“使用Python进行科学计算和数据分析”,“score”:0.9806,“subfield”:{“id”:“https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/hig-performance-computing(https://openalex.org/关键字/高性能计算)“,”display_name“:”高性能计算“,”score“:0.54321},{”id“:”https://openalex.org/keywords/performance-optimization网站“,”display_name“:”性能优化“,”score“:0.52262},{”id“:”https://openalex.org/keywords/dep-learning网站“,”display_name“:”深度学习“,”score“:0.517067},{”id“:”https://openalex.org/keywords/gpu-computing网站“,”display_name“:”GPU计算“,”score“:0.516028},{”id“:”https://openalex.org/keywords/异构计算“,”display_name“:”异构计算“,”score“:0.503486}],”concepts“:[{”id“:”https://openalex.org/C8642999,“wikidata”:https://www.wikidata.org/wiki/Q417168“,”display_name“:”Hyperparameter“,”level“:2,”score“:0.94581366},{”id“:”https://openalex.org/C02887219,“wikidata”:https://www.wikidata.org/wiki/Q3895221“,”display_name“:”参数化(大气建模)“,”level“:3,”score“:0.7962856},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198“,”display_name“:”计算机科学“,”level“:0,”score“:0.71374893},{”id“:”https://openalex.org/C50644808,“wikidata”:https://www.wikidata.org/wiki/Q192776“,”display_name“:”人工神经网络“,”level“:2,”score“:0.49076948},{”id“:”https://openalex.org/C2776175482,“wikidata”:https://www.wikidata.org/wiki/Q1195816“,”display_name“:”传输(计算)“,”级别“:2,”分数“:0.47457552},{”id“:”https://openalex.org/C150899416,“wikidata”:https://www.wikidata.org/wiki/Q1820378“,”display_name“:”学习迁移“,”level“:2,”score“:0.4510781},{”id“:”https://openalex.org/C2780813799,“wikidata”:https://www.wikidata.org/wiki/Q3274237“,”display_name“:”Zero(语言学)“,”level“:2,”score“:0.43934798},{”id“:”https://openalex.org/C11413529,“wikidata”:https://www.wikidata.org/wiki/Q8366“,”display_name“:”Algorithm“,”level“:1,”score“:0.42225522},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.39024457},{”id“:”https://openalex.org/C173608175,“wikidata”:https://www.wikidata.org/wiki/Q232661“,”display_name“:”并行计算“,”level“:1,”score“:0.1887433},{”id“:”https://openalex.org/C121332964,“wikidata”:https://www.wikidata.org/wiki/Q413“,”display_name“:”Physics“,”level“:0,”score“:0.173466233},{”id“:”https://openalex.org/C120665830,“wikidata”:https://www.wikidata.org/wiki/Q14620“,”display_name“:”Optics“,”level“:1,”score“:0.085938096},{”id“:”https://openalex.org/C74902906,“wikidata”:https://www.wikidata.org/wiki/Q1190858“,”display_name“:”辐射传输“,”level“:2,”score“:0.0},{”id“:”https://openalex.org/C41895202,“wikidata”:https://www.wikidata.org/wiki/Q8162“,”display_name“:”语言学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C138885662,“wikidata”:https://www.wikidata.org/wiki/Q5891“,”display_name“:”Philosophy“,”level“:0,”score“:0.0}],”mesh“:[],”locations_count“:3,”location“:[{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2203.03466“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa网站“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},{”is_oa“:true,”landing_page_url“:”http://arxiv.org/abs/2203.03466,“pdf_url”:http://arxiv.org/pdf/203.03466,“源”:{“id”:https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”Cornell University“],“type”:“repository”},“license”:null,“licence_id”:null,“version”:“submittedVersion”,“is_accepted”:false,“is_published”:false},{“is_oa”:false,“landing_page_url”:“https://api.datacite.org/dois/10.48550/arxiv.2203.03466“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4393179698“,”display_name“:”DataCite API“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_in_doaj“:false,”is_core“:false,”host_organization“:”https://openalex.org/I4210145204“,”“host_organization_name”:“DataCite”,“host_organization_lineage”:[“https://openalex.org/I4210145204“],”host_organization_lineage_names“:[”DataCite“],“type”:“metadata”},“license”:null,“licence_id”:null,“version”:null}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://arxiv.org/abs/2203.03466“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is.in_doaj“:false,”is_core“:fase,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa网站“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”sustainable_development_goals“:[{”score“:0.43,”id“:”https://metadata.un.org/sdg/9“,”display_name“:”工业、创新和基础设施“}],”grants“:[],”datasets“:【】,”versions“:【],”referenced_works_count“:0,”referrenced_works“:[],”related_work“:[”https://openalex.org/W4390421286","https://openalex.org/W4389724018","https://openalex.org/W4360995913","https://openalex.org/W4318719684","https://openalex.org/W4318559728","https://openalex.org/W4280563792","https://openalex.org/W4206657577","https://openalex.org/W3183136280","https://openalex.org/W2775233965","https://openalex.org/W2140186469“],”ngrams_url“:”https://api.openalex.org/works/W4221164116/ngrams网站“,”“abstract_inverted_index”:{“Hyperparameter”:[0],“(HP)”:[1],“tuning”:[2,47,77113136],“in”:[3,23,56],“deep”:[4],“learning”:[5],“is”:[6],“an”:[7],“priced”:[8],“process”:[9],“prebitively”:[10],“so”:[11],“for”:[12],“neural”:[13],“networks”:[14],”(NNs)“:[15],”with“:[1610135],”数十亿“:[17],”of“:[18,99106130140147],”parameters。“:[19],”我们“:[20,82],”show“:[21],”that“:[22],”the“:[24,53,59,71,78131],”recently“:[25],”discovered“:[26],”Maximal“:[27],”Update“:[28],”Parametrization“:[29],”(muP),“:[30],”many“:[31],”optimal“:[32],”HP“:[33,95],”remain“:[24],”stable“:[35],”even“:[36],”as“:[37],”型号“:[38,55,98],”尺寸“:[39],”更改。“:[40],”This“:[41],”leads“:[42],”to“:[43,70116],”a“:[44,63,97111],”new“:[45],”HP“:[46,60],”paradigm“:[48],”we“:[491021226],”call“:[50],”muTransfer“:[51],”parameterize“:[52],”target“:[54],”muP“:[57],”tune“:[58],”间接“:[61],”on“:[62,8 5],“较小”:[64],“模型”,“:[65,73134],”和“:[66,87155],”零发射“:[67],”转移“:[68],”它们“:[69],”全尺寸“:[72],”即“:[74],”无“:[75],”直接“:[76],”后者“:[79],”在“:[80153],”全部。“:[81],”verify“:[83],”muTransfer“:[84],”Transformer“:[86],”ResNet。“:[88],”For“:[89],”example,“:[90],”1)“:[91],”by“:[92121],”transferring“:[93122],”pretraining“:[94117142],”from“:[96123],”13M“:[100],”parameters“:[101125],”exeperformer“:[103127],”published“:[104128],”numbers“:[105129],”BERT-large“:[107118],”(350M“:[008],”parameters),“:[109],”总计“:[112141],”成本“:[114137],”等价物“:[115],”一次;“:[119],”2)“:[120],”40M“:[124],”6.7B“:[132],”GPT-3“:[133],”仅“:[138],”7%“:[139],”成本。“:[143],”A“:[144],”Pytorch“:[145],”implementation“:[146],”our“:[148],”technology“:[149],”can“:[150],”be“:[151],”find“:[152],”github.com/microsoft/mup“:[154],”installable“:[156],”via“:[157],”`pip“:[158],”安装“:[159],”mup`。“:[160]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W4221164116“,”counts_by_year“:[{”年“:2023,”引用_by_count“:2}],”更新日期“:”2024-06-26T08:41:12.875541“,”创建日期“:“2022-04-03”}