Instructions to use mlx-community/Solar-Open-100B-6bit with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use mlx-community/Solar-Open-100B-6bit with MLX:

# Make sure mlx-lm is installed
# pip install --upgrade mlx-lm

# Generate text with mlx-lm
from mlx_lm import load, generate

model, tokenizer = load("mlx-community/Solar-Open-100B-6bit")

prompt = "Write a story about Einstein"
messages = [{"role": "user", "content": prompt}]
prompt = tokenizer.apply_chat_template(
    messages, add_generation_prompt=True
)

text = generate(model, tokenizer, prompt=prompt, verbose=True)

Notebooks
Google Colab
Kaggle
Local Apps
LM Studio

Pi new

How to use mlx-community/Solar-Open-100B-6bit with Pi:

Start the MLX server

# Install MLX LM:
uv tool install mlx-lm
# Start a local OpenAI-compatible server:
mlx_lm.server --model "mlx-community/Solar-Open-100B-6bit"

Configure the model in Pi

# Install Pi:
npm install -g @mariozechner/pi-coding-agent
# Add to ~/.pi/agent/models.json:
{
  "providers": {
    "mlx-lm": {
      "baseUrl": "http://localhost:8080/v1",
      "api": "openai-completions",
      "apiKey": "none",
      "models": [
        {
          "id": "mlx-community/Solar-Open-100B-6bit"
        }
      ]
    }
  }
}

Run Pi

# Start Pi in your project directory:
pi

Hermes Agent new

How to use mlx-community/Solar-Open-100B-6bit with Hermes Agent:

Start the MLX server

# Install MLX LM:
uv tool install mlx-lm
# Start a local OpenAI-compatible server:
mlx_lm.server --model "mlx-community/Solar-Open-100B-6bit"

Configure Hermes

# Install Hermes:
curl -fsSL https://hermes-agent.nousresearch.com/install.sh | bash
hermes setup
# Point Hermes at the local server:
hermes config set model.provider custom
hermes config set model.base_url http://127.0.0.1:8080/v1
hermes config set model.default mlx-community/Solar-Open-100B-6bit

Run Hermes

hermes

MLX LM

How to use mlx-community/Solar-Open-100B-6bit with MLX LM:

Generate or start a chat session

# Install MLX LM
uv tool install mlx-lm
# Interactive chat REPL
mlx_lm.chat --model "mlx-community/Solar-Open-100B-6bit"

Run an OpenAI-compatible server

# Install MLX LM
uv tool install mlx-lm
# Start the server
mlx_lm.server --model "mlx-community/Solar-Open-100B-6bit"
# Calling the OpenAI-compatible server with curl
curl -X POST "http://localhost:8000/v1/chat/completions" \
   -H "Content-Type: application/json" \
   --data '{
     "model": "mlx-community/Solar-Open-100B-6bit",
     "messages": [
       {"role": "user", "content": "Hello"}
     ]
   }'

Solar-Open-100B-6bit / solar_open_tool_parser.py

kernelpool

Add files using upload-large-folder tool

698e721 verified 5 months ago

raw

history blame contribute delete

10.4 kB

	# coding=utf-8
	# Copyright 2025 Upstage AI.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import random
	import re
	import string
	import ast
	import json
	from collections.abc import Sequence
	from typing import Union, Tuple, List, Optional

	from vllm.entrypoints.openai.protocol import (
	ChatCompletionRequest,
	DeltaMessage,
	DeltaFunctionCall,
	DeltaToolCall,
	ExtractedToolCallInformation,
	ToolCall,
	FunctionCall,
	)
	from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
	ToolParser
	)
	from vllm.logger import init_logger

	import pyjson5

	class ToolCallID:
	_LENGTH = 10

	def __init__(self, id_val: str, validation: bool = False):
	self._id = id_val
	if validation:
	self._validate()

	@classmethod
	def random(cls, validation=False) -> 'ToolCallID':
	chars = string.ascii_lowercase + string.digits
	return cls(''.join(random.choice(chars) for _ in range(ToolCallID._LENGTH)), validation=validation)

	def _validate(self):
	assert len(self._id) == ToolCallID._LENGTH
	pattern = r'^[a-z0-9]{10}$'
	assert re.match(pattern, self._id) is not None

	def to_string(self) -> str:
	return self._id

	def __str__(self) -> str:
	return self.to_string()


	logger = init_logger(__name__)


	class SolarOpenToolParser(ToolParser):

	def extract_tool_calls(
	self,
	model_output: str,
	request: ChatCompletionRequest,
	) -> ExtractedToolCallInformation:
	content, tool_calls = self._parse_text(model_output)
	return ExtractedToolCallInformation(
	tools_called=len(tool_calls) > 0,
	tool_calls=tool_calls,
	content=content if content else None,
	)

	def extract_tool_calls_streaming(
	self,
	previous_text: str,
	current_text: str,
	delta_text: str,
	previous_token_ids: Sequence[int],
	current_token_ids: Sequence[int],
	delta_token_ids: Sequence[int],
	request: ChatCompletionRequest,
	) -> Union[DeltaMessage, None]:
	# 1) Emit plain content tokens immediately until content terminator
	# tags or tool_calls section begins. Be careful when tokenizer groups
	# multiple special tags into a single delta (e.g., "<\|tool_calls\|><\|tool_call:begin\|>").
	# Only emit as content if BOTH:
	# - previous_text has not seen any special markers, and
	# - delta_text does NOT contain any of those markers as a substring.
	if delta_text:
	# Do NOT emit content if we have already started any special section
	# including tool call tags. Content should only be emitted at the
	# very beginning before any markers show up.
	special_markers = (
	"<\|flush\|>",
	"<\|end\|>",
	"<\|begin\|>",
	"<\|tool_calls\|>",
	"<\|tool_call:begin\|>",
	"<\|tool_call:name\|>",
	"<\|tool_call:args\|>",
	"<\|tool_call:end\|>",
	"<\|calls\|>",
	)
	if not any(tag in previous_text for tag in special_markers):
	if not any(tag in delta_text for tag in special_markers):
	return DeltaMessage(content=delta_text, tool_calls=[])

	tool_call_deltas: list[DeltaToolCall] = []

	# Helper lambdas to analyze current_text state
	def _completed_calls_count(txt: str) -> int:
	return len(self._parse_tool_calls(txt))

	# Detect if a new tool_call started streaming its args just now.
	if delta_text and "<\|tool_call:args\|>" in delta_text:
	# Extract id and name for the latest tool call block present so far.
	begin_tag = "<\|tool_call:begin\|>"
	name_tag = "<\|tool_call:name\|>"
	args_tag = "<\|tool_call:args\|>"

	latest_args = current_text.rfind(args_tag)
	latest_name = current_text.rfind(name_tag, 0, latest_args if latest_args != -1 else None)
	latest_begin = current_text.rfind(begin_tag, 0, latest_name if latest_name != -1 else None)
	if latest_begin != -1 and latest_name != -1 and latest_args != -1 and latest_begin < latest_name < latest_args:
	tool_id = current_text[latest_begin + len(begin_tag):latest_name]
	func_name = current_text[latest_name + len(name_tag):latest_args]
	# Index equals number of args tags seen before this delta
	index = previous_text.count(args_tag)
	tool_call_deltas.append(
	DeltaToolCall(
	id=tool_id,
	type="function",
	index=index,
	function=DeltaFunctionCall(name=func_name, arguments=""),
	)
	)

	# If we are inside args (after last args tag without end), stream arg chunk
	begin_tag = "<\|tool_call:begin\|>"
	args_tag = "<\|tool_call:args\|>"
	end_tag = "<\|tool_call:end\|>"
	last_args_pos = current_text.rfind(args_tag)
	last_end_pos = current_text.rfind(end_tag)
	if last_args_pos != -1 and (last_end_pos == -1 or last_args_pos > last_end_pos):
	# Currently within args for the latest tool call
	# Determine previous args text and current args text to compute delta
	prev_last_args = previous_text.rfind(args_tag)
	prev_last_end = previous_text.rfind(end_tag)
	if prev_last_args != -1 and (prev_last_end == -1 or prev_last_args > prev_last_end):
	# Already inside args previously: emit only the delta_text
	if delta_text and delta_text not in (begin_tag, args_tag, end_tag):
	# Stream into the most recently started (but not yet ended) call
	index = max(previous_text.count(args_tag) - 1, 0)
	tool_call_deltas.append(
	DeltaToolCall(
	id=None,
	type=None,
	index=index,
	function=DeltaFunctionCall(name=None, arguments=delta_text),
	)
	)

	if not tool_call_deltas:
	return None

	return DeltaMessage(content=None, tool_calls=tool_call_deltas)

	# --------------------
	# Internal helpers
	# --------------------
	def _parse_text(self, text: str) -> Tuple[Optional[str], List[ToolCall]]:
	"""Parse the completed segments from the given text.

	Returns (content, tool_calls) where content is extracted as the leading
	text up to the first '<\|flush\|>' or '<\|end\|>' marker, and tool_calls is
	a list of fully parsed tool calls inside '<\|tool_calls\|> ... <\|calls\|>'.
	"""
	content = self._parse_content(text)
	tool_calls = self._parse_tool_calls(text)
	return content, tool_calls

	def _parse_content(self, text: str) -> Optional[str]:
	"""Extract assistant content from the text.

	Rule: take the leading content before the first '<\|flush\|>' or
	'<\|end\|>' marker. If neither marker exists, return None.
	"""
	end_tags = ["<\|flush\|>", "<\|end\|>"]

	# Take leading content before the first end tag
	end_positions = [pos for tag in end_tags if (pos := text.find(tag)) != -1]
	if not end_positions:
	return None
	end = min(end_positions)
	# Trim only the extracted portion; tests expect exact substring
	return text[:end]

	def _parse_tool_call_args(self, text: str) -> str:
	try:
	# Try to parse as JSON
	args = json.loads(text)
	except json.JSONDecodeError:
	try:
	# Try to parse as JSON5
	args = pyjson5.decode(text)
	except pyjson5.Json5DecoderException:
	try:
	# Try to parse as Python literal
	args = ast.literal_eval(text)
	except Exception:
	# Fallback: return the original string
	args = text
	if not isinstance(args, str):
	# Always convert back to JSON string
	args = json.dumps(args)
	return args

	def _parse_tool_calls(self, text: str) -> List[ToolCall]:
	tool_calls: list[ToolCall] = []
	# Parse globally; wrapper '<\|tool_calls\|>' may or may not be present.
	section_start = 0
	# section ends at <\|calls\|> if present, else use end of text
	section_end = text.find("<\|calls\|>")
	if section_end == -1:
	section_end = len(text)
	i = section_start
	while True:
	begin_tag = "<\|tool_call:begin\|>"
	name_tag = "<\|tool_call:name\|>"
	args_tag = "<\|tool_call:args\|>"
	end_tag = "<\|tool_call:end\|>"

	b = text.find(begin_tag, i, section_end)
	if b == -1:
	break
	b += len(begin_tag)
	n = text.find(name_tag, b, section_end)
	if n == -1:
	break
	tool_id = text[b:n]
	n += len(name_tag)
	a = text.find(args_tag, n, section_end)
	if a == -1:
	break
	name = text[n:a]
	a += len(args_tag)
	e = text.find(end_tag, a, section_end)
	if e == -1:
	break
	args = text[a:e]
	tool_calls.append(
	ToolCall(
	id=tool_id,
	function=FunctionCall(name=name, arguments=self._parse_tool_call_args(args)),
	))
	i = e + len(end_tag)

	return tool_calls