cyankiwi
/

Jan-code-4b-AWQ-8bit

Text Generation

text-generation-inference

compressed-tensors

Model card Files Files and versions

Jan-code-4b-AWQ-8bit / recipe.yaml

cpatonn's picture

Upload folder using huggingface_hub

4610d26 verified 16 days ago

history blame contribute delete

1.31 kB

	default_stage:
	default_modifiers:
	AWQModifier:
	config_groups:
	group_0:
	targets: [Linear]
	weights:
	num_bits: 8
	type: int
	symmetric: true
	group_size: 32
	strategy: group
	block_structure: null
	dynamic: false
	actorder: null
	scale_dtype: null
	zp_dtype: null
	observer: mse
	observer_kwargs: {}
	input_activations: null
	output_activations: null
	format: null
	targets: [Linear]
	ignore: [lm_head]
	bypass_divisibility_checks: false
	mappings:
	- smooth_layer: re:.*input_layernorm$
	balance_layers: ['re:.q_proj$', 're:.k_proj$', 're:.*v_proj$']
	activation_hook_target: null
	- smooth_layer: re:.*v_proj$
	balance_layers: ['re:.*o_proj$']
	activation_hook_target: null
	- smooth_layer: re:.*post_attention_layernorm$
	balance_layers: ['re:.gate_proj$', 're:.up_proj$']
	activation_hook_target: null
	- smooth_layer: re:.*up_proj$
	balance_layers: ['re:.*down_proj$']
	activation_hook_target: null
	offload_device: !!python/object/apply:torch.device [cuda]
	duo_scaling: true
	n_grid: 20