sam2 安装使用笔记


ann_frame_idx = 0  # the frame index we interact with
ann_obj_id = 4  # give a unique id to each object we interact with (it can be any integers)# Let's add a box at (x_min, y_min, x_max, y_max) = (300, 0, 500, 400) to get started
box = np.array([300, 0, 500, 400], dtype=np.float32)
_, out_obj_ids, out_mask_logits = predictor.add_new_points_or_box(inference_state=inference_state,frame_idx=ann_frame_idx,obj_id=ann_obj_id,box=box,
)# show the results on the current (interacted) frame
plt.figure(figsize=(9, 6))
plt.title(f"frame {ann_frame_idx}")
plt.imshow(Image.open(os.path.join(video_dir, frame_names[ann_frame_idx])))
show_box(box, plt.gca())
show_mask((out_mask_logits[0] > 0.0).cpu().numpy(), plt.gca(), obj_id=out_obj_ids[0])

ann_frame_idx = 0  # the frame index we interact with
ann_obj_id = 4  # give a unique id to each object we interact with (it can be any integers)# Let's add a positive click at (x, y) = (460, 60) to refine the mask
points = np.array([[460, 60]], dtype=np.float32)
# for labels, `1` means positive click and `0` means negative click
labels = np.array([1], np.int32)
# note that we also need to send the original box input along with
# the new refinement click together into `add_new_points_or_box`
box = np.array([300, 0, 500, 400], dtype=np.float32)
_, out_obj_ids, out_mask_logits = predictor.add_new_points_or_box(inference_state=inference_state,frame_idx=ann_frame_idx,obj_id=ann_obj_id,points=points,labels=labels,box=box,
)# show the results on the current (interacted) frame
plt.figure(figsize=(9, 6))
plt.title(f"frame {ann_frame_idx}")
plt.imshow(Image.open(os.path.join(video_dir, frame_names[ann_frame_idx])))
show_box(box, plt.gca())
show_points(points, labels, plt.gca())
show_mask((out_mask_logits[0] > 0.0).cpu().numpy(), plt.gca(), obj_id=out_obj_ids[0])



import torch
import numpy as np
import cv2
from PIL import Image
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictorfrom segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictorimport timeimport hydraNew_SAM = True# use bfloat16 for the entire notebook
if New_SAM:torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()# image = Image.open('/home/taohu/Projects/Data/RGB/thumbnail_Picture1.png')
# image = np.array(image.convert("RGB"))image = cv2.imread('/home/taohu/Projects/Data/RGB/thumbnail_Picture1.png')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)if New_SAM:method = "SAM2"
else:method = "SAM1"start_time1 = time.time()if New_SAM:sam2_checkpoint = "models/sam2_hiera_large.pt"model_cfg = "sam2_hiera_l.yaml"sam2_model = build_sam2(model_cfg, sam2_checkpoint, device="cuda")predictor = SAM2ImagePredictor(sam2_model)predictor.set_image(image)
else:model_type = "vit_h"sam_checkpoint = "models/sam_vit_h_4b8939.pth" sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)sam.to("cuda")predictor = SamPredictor(sam)predictor.set_image(image)end_time1 = time.time()
load_time = end_time1 - start_time1
print(f"Loading time ({method}): {load_time} seconds")input_box = np.array([58,107, 213,281])
input_point = np.array([[104, 163]])
input_label = np.array([1])start_time2 = time.time()masks, scores, logits = predictor.predict(point_coords=input_point,point_labels=input_label,box=input_box,multimask_output=False,
)end_time2 = time.time()
execution_time = end_time2 - start_time2
print(f"Execution time ({method}): {execution_time} seconds")mask_array = np.array(masks[0]) if New_SAM:mask_array = mask_array.astype(np.uint8)*255 # SAM2 use 0~1 values for the maskmask_image = Image.fromarray(mask_array)mask_image.save("sam2-bw.jpg")
else:mask_image = Image.fromarray(mask_array)mask_image.save("sam1-bw.jpg")


