Spaces:

RishitMishra
/

rishit-gui-actor

Build error

rishit-gui-actor / app.py

Update app.py

7cbfd20 verified 5 months ago

1.41 kB

	from fastapi import FastAPI, UploadFile, Form
	from PIL import Image
	from gui_actor.modeling import Qwen2VLForConditionalGenerationWithPointer
	from transformers import Qwen2VLProcessor
	from gui_actor.inference import inference
	import torch
	import io

	app = FastAPI()

	# Load model + processor at startup
	MODEL_NAME = "microsoft/GUI-Actor-2B-Qwen2-VL"
	processor = Qwen2VLProcessor.from_pretrained(MODEL_NAME)
	tokenizer = processor.tokenizer
	model = Qwen2VLForConditionalGenerationWithPointer.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float32,
	device_map="auto"
	).eval()

	@app.get("/")
	def home():
	return {"message": "GUI-Actor Space is running"}

	@app.post("/predict/")
	async def predict(
	instruction: str = Form(...),
	image: UploadFile = Form(...)
	):
	# Read and process image
	img_bytes = await image.read()
	img = Image.open(io.BytesIO(img_bytes)).convert("RGB")

	# Auto resize if needed
	max_width, max_height = 480, 270
	if img.width > max_width or img.height > max_height:
	img.thumbnail((max_width, max_height))

	# Run inference
	click_point = inference(
	instruction=instruction,
	image=img,
	model=model,
	processor=processor,
	tokenizer=tokenizer
	)
	return {"click_point": click_point}

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run("app:app", host="0.0.0.0", port=7860)