diff --git a/README.md b/README.md index 87b9554..14bdd26 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,7 @@ The following table shows the supported models with sizes and the tasks that the | GPT-NeoX | 20B | Pretrained | | GPT-Neo | 1.3B | Pretrained | | GPT-J | 6B | Pretrained | +| RWKV | 169M, 430M, 1.5B, 3B, 7B, 14B | Pretrained | | Incoder | 6B | Pretrained | | CodeParrot | Small-python (110M), Small-multi(110M), 1.5B | Pretrained | | CodeBERT | CodeBERT-base, UnixCoder-base, CodeBERTa-small | Pretrained | @@ -157,6 +158,12 @@ print(model_zoo) # codegen2-3.7B pretrained # codegen2-7B pretrained # codegen2-16B pretrained +# rwkv-169M pretrained +# rwkv-430M pretrained +# rwkv-1.5B pretrained +# rwkv-3B pretrained +# rwkv-7B pretrained +# rwkv-14B pretrained # codet5 base-multi-sum pretrained # base nl2code # base refine diff --git a/codetf/configs/inference/causal_lm.yaml b/codetf/configs/inference/causal_lm.yaml index 201baad..0427df3 100644 --- a/codetf/configs/inference/causal_lm.yaml +++ b/codetf/configs/inference/causal_lm.yaml @@ -68,4 +68,28 @@ causallm-codegen2-7B-pretrained: causallm-codegen2-16B-pretrained: huggingface_url: "Salesforce/codegen2-16B" tokenizer_url: "Salesforce/codegen2-16B" + max_prediction_length: 512 +causallm-rwkv-169M-pretrained: + huggingface_url: "RWKV/rwkv-4-169m-pile" + tokenizer_url: "RWKV/rwkv-4-169m-pile" + max_prediction_length: 512 +causallm-rwkv-430M-pretrained: + huggingface_url: "RWKV/rwkv-4-430m-pile" + tokenizer_url: "RWKV/rwkv-4-430m-pile" + max_prediction_length: 512 +causallm-rwkv-1.5B-pretrained: + huggingface_url: "RWKV/rwkv-raven-1b5" + tokenizer_url: "RWKV/rwkv-raven-1b5" + max_prediction_length: 512 +causallm-rwkv-3B-pretrained: + huggingface_url: "RWKV/rwkv-raven-3b" + tokenizer_url: "RWKV/rwkv-raven-3b" + max_prediction_length: 512 +causallm-rwkv-7B-pretrained: + huggingface_url: "RWKV/rwkv-raven-7b" + tokenizer_url: "RWKV/rwkv-raven-7b" + max_prediction_length: 512 +causallm-rwkv-14B-pretrained: + huggingface_url: "RWKV/rwkv-raven-14b" + tokenizer_url: "RWKV/rwkv-raven-14b" max_prediction_length: 512 \ No newline at end of file