jerryzh168 commited on
Commit
590fe63
·
verified ·
1 Parent(s): b6f1a63

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -117,9 +117,9 @@ print("content:", content)
117
 
118
  Install the required packages:
119
  ```Shell
 
120
  pip install git+https://github.com/huggingface/transformers@main
121
  pip install --pre torchao --index-url https://download.pytorch.org/whl/nightly/cu126
122
- pip install torch
123
  pip install accelerate
124
  ```
125
 
@@ -135,12 +135,12 @@ model_to_quantize = "google/gemma-3-12b-it"
135
 
136
 
137
  from torchao.quantization import Int4WeightOnlyConfig
138
- quant_config = Int4WeightOnlyConfig(group_size=128, use_hqq=True)
139
  quantization_config = TorchAoConfig(quant_type=quant_config)
140
-
141
  quantized_model = AutoModelForCausalLM.from_pretrained(model_to_quantize, device_map="auto", torch_dtype=torch.bfloat16, quantization_config=quantization_config)
142
  tokenizer = AutoTokenizer.from_pretrained(model_id)
143
 
 
144
  # Push to hub
145
  USER_ID = "YOUR_USER_ID"
146
  MODEL_NAME = model_id.split("/")[-1]
 
117
 
118
  Install the required packages:
119
  ```Shell
120
+ pip install torch
121
  pip install git+https://github.com/huggingface/transformers@main
122
  pip install --pre torchao --index-url https://download.pytorch.org/whl/nightly/cu126
 
123
  pip install accelerate
124
  ```
125
 
 
135
 
136
 
137
  from torchao.quantization import Int4WeightOnlyConfig
138
+ quant_config = Int4WeightOnlyConfig(group_size=128, int4_packing_format="tile_packed_to_4d", int4_choose_qparams_algorithm="hqq")
139
  quantization_config = TorchAoConfig(quant_type=quant_config)
 
140
  quantized_model = AutoModelForCausalLM.from_pretrained(model_to_quantize, device_map="auto", torch_dtype=torch.bfloat16, quantization_config=quantization_config)
141
  tokenizer = AutoTokenizer.from_pretrained(model_id)
142
 
143
+
144
  # Push to hub
145
  USER_ID = "YOUR_USER_ID"
146
  MODEL_NAME = model_id.split("/")[-1]