Training Script stablity 3B and 7B

`# Developed by Aamir Mirza
# create a conda virtual environment python 3.9
# install PyTorch 1.13.1 ( not 2.0)
# conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.7 -c pytorch -c nvidia
# install the latest transformers
# conda install -c conda-forge transformers
# install deepspeed  from GitHub not pip install
# build deepspeed with CPU Adam optimiser support like this
# git clone https://github.com/microsoft/DeepSpeed
# DS_BUILD_CPU_ADAM=1 pip install .
# accelerate via pip
#  pip  install Ninja
# conda install -c conda-forge mpi4py
# train via commandline for example
#  deepspeed  train_gptNX_v2.py  --num_gpus=2
# In my case I have 2x 3090 24GB
from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast, TextDataset, \
    DefaultDataCollator, DataCollatorForLanguageModeling, DataCollatorWithPadding
from transformers import Trainer, TrainingArguments
from datasets import load_dataset
import os

os.environ['OMPI_MCA_opal_cuda_support'] = 'true'
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
# If you got a single GPU then change this to one
os.environ["WORLD_SIZE"] = "2"

# Change this to your requirement for example 4096 (MAX)
MAX_LEN = 1024

stage2_config = """{
    "bf16": {
        "enabled": "auto",
        "loss_scale": 0,
        "loss_scale_window": 1000,
        "initial_scale_power": 16,
        "hysteresis": 2,
        "min_loss_scale": 1
    },

    "optimizer": {
        "type": "AdamW",
        "params": {
            "lr": "auto",
            "betas": "auto",
            "eps": "auto",
            "weight_decay": "auto"
        }
    },

    "scheduler": {
        "type": "WarmupLR",
        "params": {
            "warmup_min_lr": "auto",
            "warmup_max_lr": "auto",
            "warmup_num_steps": "auto"
        }
    },

    "zero_optimization": {
        "stage": 2,
        "offload_optimizer": {
            "device": "cpu",
            "pin_memory": true
        },
        "allgather_partitions": true,
        "allgather_bucket_size": 2e8,
        "overlap_comm": true,
        "reduce_scatter": true,
        "reduce_bucket_size": 2e8,
        "contiguous_gradients": true
    },

    "gradient_accumulation_steps": "auto",
    "gradient_clipping": "auto",
    "train_batch_size": "auto",
    "train_micro_batch_size_per_gpu": "auto"
} """


class CustomTrainer(Trainer):
    def compute_loss(self, model_a, inputs_a, return_outputs=False):
        strd = ' '
        outputs = model_a(**inputs_a, labels=inputs_a["input_ids"])
        loss = outputs.loss
        return (loss, outputs) if return_outputs else loss


tokenizer = GPTNeoXTokenizerFast.from_pretrained("stabilityai/stablelm-base-alpha-3b")


def process_data(examples):
    texts = examples["text"]
    # Remove empty lines
    texts = [text for text in texts if len(text) > 0 and not text.isspace()]
    # Remove lines that are too long
    texts = [text for text in texts if len(text) < 512]
    # Remove lines that are too short
    texts = [text for text in texts if len(text) > 16]
    # add newline character
    texts = [text + ' ' + '\n' for text in texts]
    examples["text"] = texts
    return examples


# process dataset columns [text] use tokenizer to get input_ids and attention mask
def process_data_add_mask(examples):
    text = examples['text']
    tokenizer.pad_token = tokenizer.eos_token
    # Tokenize text
    encoded_dict = tokenizer(
        text,
        padding=True,
        truncation=True,
        max_length=MAX_LEN
    )
    # Add input_ids and attention_mask to example
    examples['input_ids'] = encoded_dict['input_ids']
    examples['attention_mask'] = encoded_dict['attention_mask']
    return examples


imdb_dataset = load_dataset('imdb')
imdb_dataset_train = imdb_dataset['train']
imdb_dataset_train = imdb_dataset_train.shuffle()
imdb_dataset_train = imdb_dataset_train.map(process_data, batched=True, remove_columns=['label'])
imdb_dataset_val = imdb_dataset['test']
imdb_dataset_val = imdb_dataset_val.shuffle()
imdb_dataset_val = imdb_dataset_val.map(process_data, batched=True, remove_columns=['label'])
train_dataset = imdb_dataset_train.map(process_data_add_mask, remove_columns=["text"], batched=True)
val_dataset = imdb_dataset_val.map(process_data_add_mask, remove_columns=["text"], batched=True)
strs = " "

model = GPTNeoXForCausalLM.from_pretrained("stabilityai/stablelm-base-alpha-3b")

# absolute path required for deepspeed config
# you can use the JSON above to create your own config
z_optimiser = '/two-tb/train_GPTNX/zeromq_config/stablelm-base-alpha-3b_config.json'
data_collator = DataCollatorWithPadding(tokenizer=tokenizer,
                                        return_tensors="pt")
training_args_v2 = TrainingArguments(
    output_dir="./trained_model",
    learning_rate=2e-5,
    save_total_limit=2,
    fp16=True,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=12,
    evaluation_strategy="epoch",
    deepspeed=z_optimiser,
    num_train_epochs=1
)

# Set up the trainer
trainer = CustomTrainer(
    model=model,
    args=training_args_v2,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer,
)
trainer.train()
# trainer.save_model()
`

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Training Script stablity 3B and 7B #72

create a conda virtual environment python 3.9

install PyTorch 1.13.1 ( not 2.0)

conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.7 -c pytorch -c nvidia

install the latest transformers

conda install -c conda-forge transformers

install deepspeed from GitHub not pip install

build deepspeed with CPU Adam optimiser support like this

git clone https://github.com/microsoft/DeepSpeed

DS_BUILD_CPU_ADAM=1 pip install .

accelerate via pip

pip install Ninja

conda install -c conda-forge mpi4py

train via commandline for example

deepspeed train_gptNX_v2.py --num_gpus=2

In my case I have 2x 3090 24GB

If you got a single GPU then change this to one

Change this to your requirement for example 4096 (MAX)

process dataset columns [text] use tokenizer to get input_ids and attention mask

absolute path required for deepspeed config

you can use the JSON above to create your own config

Set up the trainer

trainer.save_model()

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Training Script stablity 3B and 7B #72

Description

create a conda virtual environment python 3.9

install PyTorch 1.13.1 ( not 2.0)

conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.7 -c pytorch -c nvidia

install the latest transformers

conda install -c conda-forge transformers

install deepspeed from GitHub not pip install

build deepspeed with CPU Adam optimiser support like this

git clone https://github.com/microsoft/DeepSpeed

DS_BUILD_CPU_ADAM=1 pip install .

accelerate via pip

pip install Ninja

conda install -c conda-forge mpi4py

train via commandline for example

deepspeed train_gptNX_v2.py --num_gpus=2

In my case I have 2x 3090 24GB

If you got a single GPU then change this to one

Change this to your requirement for example 4096 (MAX)

process dataset columns [text] use tokenizer to get input_ids and attention mask

absolute path required for deepspeed config

you can use the JSON above to create your own config

Set up the trainer

trainer.save_model()

Metadata

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Issue actions