{“id”:“https://openalex.org/W4387561365“,”doi“:”https://doi.org/10.48550/arxiv.2310.06694“,”title“:”Shered LLaMA:“通过结构化修剪加速语言模型预训练”,“display_name”:“Sheed LLaMA:通过结构化修剪加快语言模型预培训”,“publication_year”:2023,“publiation_date”:“2023-01-01”,“ids”:{“openalex”:“https://openalex.org/W4387561365“,”doi“:”https://doi.org/10.48550/arxiv.2310.06694“},”language“:”en“,”primary_location“:{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2310.06694“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa网站“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”type“:”preprint“,”type_crossref“:“posted-content”,”indexed_in“:[”arxiv“,”datacite“],”open_access“:{”is_oa“:true,”oa_status“:”green“,”oa_url“:”https://arxiv.org/abs/2310.06694“,”any_repository_has_fulltext“:true},”authorships“:[{”author_position“:”第一“,”作者“:{”id“:”https://openalex.org/A5003465314“,”display_name“:”Mengzhou Xia“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Xia,Mengzhua“,”raw _affiliation_strings“:]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5006863331“,”display_name“:”Tianyu Gao“,”orcid“:null},”institutions“:[],”countries“:[],”is_corresponding“:false,”raw_author_name“:”Gao,Tianyue“,”raw _affiliation_strings“:]},{”author_position“:”middle“,”author“:{”id“:”https://openalex.org/A5008306856“,”display_name“:”曾志远“,”orcid“:”https://orcid.org/0000-0001-7483-1438“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”曾致远“,”raw关联字符串“:[]},{“author_position”:“last”,“author”:{“id”:“https://openalex.org/A5051064208“,”display_name“:”陈丹琪“,”orcid“:”https://orcid.org/0000-0002-6226-6838“},”机构“:[],”国家“:[[],”is_corresponding“:false,”raw_author_name“:”陈丹奇“,”raw关联字符串“:[]}],”countries_distict_count“:0,”机构区分计数“:0,“cited_by_percentile_year“:{“min”:0,“max”:78},“biblio”:{卷:null,“问题”:nullhttps://openalex.org/T10028“,”display_name“:”自然语言处理“,”score“:0.9979,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},”主题“:[{”id“:”https://openalex.org/T10028“,”display_name“:”自然语言处理“,”score“:0.9979,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}},{”id“:”https://openalex.org/T10181“,”display_name“:”统计机器翻译和自然语言处理“,”score“:0.9971,”subfield“:{”id“:”https://openalex.org/subfields/1702“,”display_name“:”人工智能“},”字段“:{”id“:”https://openalex.org/fields/17“,”display_name“:”Computer Science“},”domain“:{”id“:”https://openalex.org/domains/3“,”display_name“:”物理科学“}}],”关键词“:[{”id“:”https://openalex.org/keywords/preained-models“,”display_name“:”预训练模型“,”score“:0.580841},{”id“:”https://openalex.org/keywords/language-modeling“,”display_name“:”语言建模“,”score“:0.580554},{”id“:”https://openalex.org/keywords/topic-modeling网站“,”display_name“:”主题建模“,”score“:0.564647},{”id“:”https://openalex.org/keywords/neural-machine-translation网站“,”display_name“:”神经机器翻译“,”score“:0.517265},{”id“:”https://openalex.org/keywords/多语言-神经-机器翻译“,”display_name“:”多语言神经机器翻译“,”score“:0.51133}],”concepts“:[{”id“:”https://openalex.org/C108010975,“wikidata”:https://www.wikidata.org/wiki/Q500094“,”display_name“:”修剪“,”级别“:2,”分数“:0.90770626},{”id“:”https://openalex.org/C2780586970,“wikidata”:https://www.wikidata.org/wiki/Q1357284“,”display_name“:”人气“,”level“:2,”score“:0.64850986},{”id“:”https://openalex.org/C41008148,“wikidata”:https://www.wikidata.org/wiki/Q21198(网址:https://www.wikidata.org/wiki/Q21198)“,”display_name“:”计算机科学“,”level“:0,”score“:0.63681555},{”id“:”https://openalex.org/C137293760,“wikidata”:https://www.wikidata.org/wiki/Q3621696“,”display_name“:”语言模型“,”level“:2,”score“:0.61430585},{”id“:”https://openalex.org/C26517878,“wikidata”:https://www.wikidata.org/wiki/Q228039“,”display_name“:”Key(lock)“,”level“:2,”score“:0.5406136},{”id“:”https://openalex.org/C2781235140,“wikidata”:https://www.wikidata.org/wiki/Q275131“,”display_name“:”Scratch“,”level“:2,”score“:0.4867063},{”id“:”https://openalex.org/C204323151,“wikidata”:https://www.wikidata.org/wiki/Q905424“,”“display_name”“:”范围(航空)“,”级别“:2,”分数“:0.41397518},{”id“:”https://openalex.org/C119857082,“wikidata”:https://www.wikidata.org/wiki/Q2539“,”display_name“:”机器学习“,”level“:1,”score“:0.40917283},{”id“:”https://openalex.org/C154945302,“wikidata”:https://www.wikidata.org/wiki/Q11660“,”display_name“:”人工智能“,”level“:1,”score“:0.3259732},{”id“:”https://openalex.org/C127413603,“wikidata”:https://www.wikidata.org/wiki/Q11023“,”display_name“:”Engineering“,”level“:0,”score“:0.12786537},{”id“:”https://openalex.org/C199360897,“wikidata”:https://www.wikidata.org/wiki/Q9143“,”display_name“:”编程语言“,”level“:1,”score“:0.09991759},{”id“:”https://openalex.org/C15744967,“wikidata”:https://www.wikidata.org/wiki/Q9418“,”display_name“:”心理学“,”等级“:0,”分数“:0.093771785},{”id“:”https://openalex.org/C86803240,“wikidata”:https://www.wikidata.org/wiki/Q420“,”display_name“:”生物学“,”等级“:0,”分数“:0.07291639},{”id“:”https://openalex.org/C77805123,“wikidata”:https://www.wikidata.org/wiki/Q161272“,”display_name“:”社会心理学“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C38652104,“wikidata”:https://www.wikidata.org/wiki/Q3510521“,”display_name“:”计算机安全“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C146978453,“wikidata”:https://www.wikidata.org/wiki/Q3798668“,”display_name“:”航空航天工程“,”level“:1,”score“:0.0},{”id“:”https://openalex.org/C6557445,“wikidata”:https://www.wikidata.org/wiki/Q173113“,”display_name“:”Agronomy“,”level“:1,”score“:0.0}],”mesh“:[],”locations_count“:2,”location“:[{”is_oa“:true,”landing_page_url“:”https://arxiv.org/abs/2310.06694“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa网站“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},{”is_oa“:false,”landing_page_url“:”https://api.datacite.org/dois/10.48550/arxiv.2310.06694“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4393179698“,”display_name“:”DataCite API“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I4210145204“,”“host_organization_name”:“DataCite”,“host_organization_lineage”:[“https://openalex.org/I4210145204“],”host_organization_lineage_names“:[”DataCite“],“type”:“metadata”},“license”:null,“licence_id”:null,“version”:null}],“best_oa_location”:{“is_oa”:true,“landing_page_url”:“https://arxiv.org/abs/2310.06694“,”pdf_url“:空,”源“:{”id“:”https://openalex.org/S4306400194“,”display_name“:”arXiv(康奈尔大学)“,”issn_l“:null,”issn“:null,”is_oa“:true,”is_ in_doaj“:false,”host_organization“:”https://openalex.org/I205783295“,”“host_organization_name”:“康奈尔大学”,“host_organization_lineage”:[“https://openalex.org/I205783295“],”host_organization_lineage_names“:[”康奈尔大学“],“type”:“repository”},“license”:“other-oa”,“licence_id”:“https://openalex.org/licenses/other-oa网站“,”version“:”submittedVersion“,”is_accepted“:false,”is_published“:false},”sustainable_development_goals“:[{”id“:”https://metadata.un.org/sdg/4“,”display_name“:”素质教育“,”score“:0.76}],”grants“:[],”datasets“:【】,”versions“:【],”referenced_works_count“:0,”referrenced_works“:[],”related_work斯“:[”https://openalex.org/W2770018148","https://openalex.org/W2518037665","https://openalex.org/W2477036161","https://openalex.org/W2475116013","https://openalex.org/W2385135707","https://openalex.org/W2384861574","https://openalex.org/W2368605798","https://openalex.org/W2368049389","https://openalex.org/W2358308169","https://openalex.org/W2348524959“],”ngrams_url“:”https://api.openalex.org/works/W4387561365/ngrams“,”“abstract_inverted_index”:{“The”:[0],“popularity”:[1],“of”:[2,20,29,37103121145159169],“LLaMA”:[3],“(Touvron”:[4],“et”:[5],“al.”:[6],“2023a;b)”:[7],“and”:[8,84,86,93136152161],“other”:/9],“recently”:[10],“emerged”:[11],“mediate-size”:[12],“大型“:[13],”语言“:[14],”模型“:[15,32140144175],”(LLM)“:[16],”突出显示“:[17],”“:[18,27101119126130],”潜在“:[19],”建筑“:[21197],”较小“:[22,54198],”尚未“:[23],”强大“:[24],”LLM。“:[25199],”“不管”“:[26],”成本“:[28],”培训“:[30108173],”这样“:[31148174],”来自“:[33,56176],”刮擦“:[34],”开启“:[35111155],”万亿“:[36],”代币“:[38],”保持“:[39],”高。“:[40],“In”:[41],“this”:[42],“work”,:[43],“we”:%44],“study”:[45],“structured”:[46,68188],“pruning”:[47129189],“as”:[48149],“an”:[49,90],“effectived”:【50】,“means”:【51】,“to”:[52,75134172],“develop”:[P3],“LLM”:[55186],“pre-dired”,“:[57],“larger”:[58,73],“模型。“:[59],”我们的“:[60],”方法“:[6123195],”采用“:[62],”两个“:[63],”关键“:[64],”技术:“:[65],”(1)“:[66],”目标“:[67],”修剪“:[69],”哪个“:[70,98],”修剪“:[71],”a“:[72,756156191],”模型“:[74132],”指定“:[77],”目标“:[78],”形状“:[79],”by“:[80124],”删除“:[81],”层,“:[82],”头,“:[83],”中间“:[85],”隐藏“:[87],”维度“:[88],”in“:[89106],”端到端“:[91],”方式“:[92],”(2)“:[94],”动态“:[95],”批次“:[96109],”加载“:[97],”动力学“:[99],”更新“:[100],”组成“:[102],”采样“:[104],”数据“:[105],”每个“:[107],”基于“:[110],”变化“:[112],”损失“:[113],”交叉“:[114],“不同”:[115],“域”。“:[116],“我们”:[117],“证明”:[118],“疗效”:[120],“我方”:[122],“呈现”:[125],“Shered-LLaMA”:[127139],“系列”,“:[128],“LLaMA2-7B”:[131],“向下”:[133],“1.3B”:%135],“2.7B”:+137],“参数”。“:[138],”优于“:[141],”最先进“:[142],”开源“:[143],”等价“:[146],”大小“,:[147],”皮提亚“:[150],”INCITE“:[151],”OpenLLaMA“:[153],”模型“:[154],”宽“:[157],”范围“:[158],”下游“:[160],”指令“:[162],”调谐“:[163],”评估“:[164],”while“:[165],”要求“:[166],”仅“:[167],”3%“:[168],”计算“:[170],”比较了“:[171],”scratch。“:[177],”This“:[178],”work“:[179],”provides“:[180],”imputerative“:[181],”evidence“:[182],”that“:[183],”leverage“:[184],”existing“:[185],”with“:%187],”is“:%190],”far“:[192],”more“:[193],”cost-of-cost“:[194],”for“:[196]},”cited_by_api_url“:”https://api.openalex.org/works?filter=cites:W4387561365“,”counts_by_year“:[],”updated_date“:”2024-05-25T19:33:40.376824“,”created_date:“2023-10-12”}“