voc2yolo - ACGIST

import os
import cv2
import xml.etree.ElementTree as ET

def convert(size, box):
    dw = 1.0 / size[0]
    dh = 1.0 / size[1]
    x_center = (box[0] + box[1]) / 2.0
    y_center = (box[2] + box[3]) / 2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x_center * dw
    y = y_center * dh
    w = w * dw
    h = h * dh
    return (x, y, w, h)

def convert_annotation(xml_path, classes):
    print(f"Processing {xml_path}")
    tree = ET.parse(xml_path)
    root = tree.getroot()
    size = root.find('size')
    if size is None:
        jpg = cv2.imread(xml_path.replace('.xml', '.jpg'))
        w = jpg.shape[1]
        h = jpg.shape[0]
    else:
        w = int(size.find('width').text)
        h = int(size.find('height').text)
    txt_path = xml_path.replace('.xml', '.txt')
    with open(txt_path, 'w') as f:
        for obj in root.iter('object'):
            cls = obj.find('name').text
            if cls not in classes:
                continue
            cls_id = classes.index(cls)
            bndbox = obj.find('bndbox')
            xml_box = (
                float(bndbox.find('xmin').text),
                float(bndbox.find('xmax').text),
                float(bndbox.find('ymin').text),
                float(bndbox.find('ymax').text)
            )
            txt_box = convert((w, h), xml_box)
            f.write(f'{cls_id} {' '.join([str(a) for a in txt_box])}\n')

if __name__ == '__main__':
    xml_dir = 'person'
    classes = ['pedestrians']
    for xml_file in os.listdir(xml_dir):
        if xml_file.endswith('.xml'):
            convert_annotation(os.path.join(xml_dir, xml_file), classes)