Source code for kitcar_ml.utils.data.import_label_studio_labels

import json
import os
from argparse import ArgumentParser
from io import BytesIO

import boto3
from PIL import Image
from tqdm import tqdm

from kitcar_ml.utils.data.labeled_dataset import LabeledDataset


[docs]def download_image_from_s3(s3, bucket: str, filepath: str, output_path: str) -> Image: file_byte_string = s3.get_object(Bucket=bucket, Key=filepath)["Body"].read() with Image.open(BytesIO(file_byte_string)) as image: image.save(output_path)
[docs]def import_label_studio_labels( annotations_dir: str, output_dir: str, force_download: bool = False ): """Convert a Label Studio JSON based Dataset to our LabeledDataset Format. See: https://doc.kitcar-team.de/kitcar-machine-learning/tutorials/datasets.html """ os.makedirs(output_dir, exist_ok=True) s3 = boto3.client("s3", endpoint_url="https://dvc.kitcar-team.de") dataset = LabeledDataset() dataset.attributes = ["x1", "y1", "x2", "y2", "class_id"] for annotation_file in tqdm( [ os.path.join(folder, filename) for folder, _, filenames in os.walk(annotations_dir) for filename in filenames ], desc="Importing Label Studio Data", ): # Load annotations from file with open(annotation_file) as f: annotations = json.load(f) # Copy Image to output dir image_path = annotations["task"]["data"]["image"] bucket, file_path = image_path.replace("s3://", "").split("/", 1) image_filename = os.path.basename(image_path) # Load and Save Image local_path = os.path.join(output_dir, image_filename) # Download image only if it does not exist yet or if the user forces it if force_download or not os.path.exists(local_path): # Download image file download_image_from_s3(s3, bucket, file_path, local_path) for label in annotations["result"]: img_width, img_height = label["original_width"], label["original_height"] # Get Bounding Box bbox_values = label["value"] width = bbox_values["width"] / 100 * img_width height = bbox_values["height"] / 100 * img_height x1 = bbox_values["x"] / 100 * img_width y1 = bbox_values["y"] / 100 * img_height x2 = x1 + width y2 = y1 + height x1, y1, x2, y2 = (round(val) for val in (x1, y1, x2, y2)) # Check label coordinates if x1 < 0 or x2 > img_width: continue if y1 < 0 or y2 > img_height: continue if x2 - x1 <= 0 or x2 - x1 > img_width: continue if y2 - y1 <= 0 or y2 - y1 > img_height: continue if "rectanglelabels" not in bbox_values: continue # Get class class_name = bbox_values["rectanglelabels"][0] if class_name in dataset.classes.values(): class_id = [ id for id, name in dataset.classes.items() if name == class_name ][0] else: class_id = len(dataset.classes) dataset.classes[class_id] = class_name # Add Label to dataset dataset.append_label(image_filename, [x1, y1, x2, y2, class_id]) # Save dataset yaml dataset.save_as_yaml(os.path.join(output_dir, "labels.yaml"))
if __name__ == "__main__": parser = ArgumentParser() parser.add_argument( "--annotations-dir", type=str, required=True, help="The dir containing all annotations.", ) parser.add_argument( "--output-dir", type=str, required=True, help="The output dir of the whole dataset.", ) parser.add_argument( "--force-download", action="store_true", help="Download images from s3 even if they are already in output dir", ) args = parser.parse_args() import_label_studio_labels(args.annotations_dir, args.output_dir, args.force_download)