Spaces:
Build error
Build error
| from fastapi import FastAPI, UploadFile, Form | |
| from PIL import Image | |
| from gui_actor.modeling import Qwen2VLForConditionalGenerationWithPointer | |
| from transformers import Qwen2VLProcessor | |
| from gui_actor.inference import inference | |
| import torch | |
| import io | |
| app = FastAPI() | |
| # Load model + processor at startup | |
| MODEL_NAME = "microsoft/GUI-Actor-2B-Qwen2-VL" | |
| processor = Qwen2VLProcessor.from_pretrained(MODEL_NAME) | |
| tokenizer = processor.tokenizer | |
| model = Qwen2VLForConditionalGenerationWithPointer.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=torch.float32, | |
| device_map="auto" | |
| ).eval() | |
| def home(): | |
| return {"message": "GUI-Actor Space is running"} | |
| async def predict( | |
| instruction: str = Form(...), | |
| image: UploadFile = Form(...) | |
| ): | |
| # Read and process image | |
| img_bytes = await image.read() | |
| img = Image.open(io.BytesIO(img_bytes)).convert("RGB") | |
| # Auto resize if needed | |
| max_width, max_height = 480, 270 | |
| if img.width > max_width or img.height > max_height: | |
| img.thumbnail((max_width, max_height)) | |
| # Run inference | |
| click_point = inference( | |
| instruction=instruction, | |
| image=img, | |
| model=model, | |
| processor=processor, | |
| tokenizer=tokenizer | |
| ) | |
| return {"click_point": click_point} | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run("app:app", host="0.0.0.0", port=7860) | |