Skip to content

Commit

Permalink
version 0.0.2 release
Browse files Browse the repository at this point in the history
  • Loading branch information
codeperfectplus committed Jun 17, 2022
1 parent 01433cb commit 7a527d6
Show file tree
Hide file tree
Showing 7 changed files with 284 additions and 271 deletions.
28 changes: 23 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ Currently, the following formats are supported:

## Installation

### Installation from source code

```bash
git clone https://github.com/codePerfectPlus/dataset-convertor/
```
Expand All @@ -48,6 +50,12 @@ source venv/bin/activate
pip install requirements.txt
```

### Installation from PyPI

```bash
pip install dataset-convertor
```

## Usage

convert annotations from one format to another format.
Expand All @@ -60,14 +68,24 @@ dataset formatting example:
- data/yolo5/JPEGImages/*.jpg
- data/yolo5/labels/*.txt

### Pascal VOC(xml) to yolo(txt)

```bash
# example command for pascal-voc(xml) to yolo(txt)
python convert.py --input-folder ./data/pascal_voc \
--output-folder ./output/yolo5 \
--input-format voc \
--output-format yolo
from convert import Convertor

con = Convertor(input_folder='/home/user/data/pascal_voc', output_folder='/home/user/data/yolo5')
con.voc2yolo()
```

### from yolo(txt) to Pascal VOC(xml)

```bash
from convert import Convertor
con = Convertor(input_folder='/home/user/data/yolo5', output_folder='/home/user/data/pascal_voc')
con.yolo2voc()
```


## Contributing

create an issue/PR if any format is missing.Open-source contribution is welcome.check the [contributing guide](/CONTRIBUTING.md) for details.
Expand Down
41 changes: 0 additions & 41 deletions convert.py

This file was deleted.

1 change: 1 addition & 0 deletions convert/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from convert.utils import Convertor
223 changes: 223 additions & 0 deletions convert/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
import os
import shutil
from glob import glob
from PIL import Image
from pathlib import Path
from datetime import datetime
from xml.etree import ElementTree as ET
import logging

logging.basicConfig(format='%(levelname)s - %(asctime)s - %(name)s - %(message)s',
datefmt='%d/%m/%Y %I:%M:%S %p',
level=logging.INFO,
handlers=[
logging.FileHandler('debug.log'),
logging.StreamHandler()])

class Convertor:
'''
Convertor class is a class for converting images and annotations
from one format to another.
Usage:
# voc to yolo
convertor = Convertor('data/VOC2007', 'outout/VOC2007_yolo', 'voc', 'yolo')
convertor.voc2yolo()
# voc to coco
convertor = Convertor('data/VOC2007', 'outout/VOC2007_coco', 'voc', 'coco')
convertor.voc2coco()
# yolo to voc
convertor = Convertor('data/yolo', 'outout/VOC2007', 'yolo', 'voc')
convertor.yolo2voc()
Parameters
----------
input_folder : str
Path to the input folder.
output_folder : str
Path to the output folder.
input_format : str
Input format of the images and annotations.
output_format : str
Output format of the images and annotations.
Methods
-------
voc2yolo()
Convert Pascal VOC format to Yolo5 format.
yolo2voc()
Convert Yolo5 format to Pascal VOC format.
yolo2coco()
Convert Yolo5 format to COCO format.
coco2yolo()
Convert COCO format to Yolo5 format.
voc2coco()
Convert Pascal VOC format to COCO format.
coco2voc()
Convert COCO format to Pascal VOC format.
pascal2tfrecord()
Convert Pascal VOC format to TFRecord format.
coco2tfrecord()
Convert COCO format to TFRecord format.
yolo2tfrecord()
Convert Yolo5 format to TFRecord format.
# Pascal Voc - XML
# Pascal VOC format is a XML file format for images and annotations.
# Yolo5 - TXT
# Yolo5 format is a text file format for images and annotations.
# COCO - JSON
# COCO format is a JSON file format for images and annotations.
# TFRecord - TFRecord
# TFRecord format is a TFRecord file format for images and annotations.
'''
def __init__(self, input_folder, outout_folder):
self.input_image_folder = os.path.join(self.input_folder, 'JPEGImages')
self.input_annotation_folder = os.path.join(self.input_folder, 'Annotations')
self.output_image_folder = os.path.join(self.output_folder, 'JPEGImages')
self.output_annotation_folder = os.path.join(self.output_folder, 'Annotations')


def voc2yolo(self):
''' Pascal Voc format to yolo txt format conversion '''

if not os.path.exists(self.output_annotation_folder):
os.makedirs(self.output_annotation_folder)

for xml_file in glob(os.path.join(self.input_annotation_folder, '*.xml')):

if not os.path.exists(self.output_image_folder):
os.makedirs(self.output_image_folder)

shutil.copy(os.path.join(self.input_image_folder, file_name), os.path.join(self.output_image_folder, file_name))

tree = ET.parse(xml_file)
root = tree.getroot()

file_name = root.find('filename').text
width = int(root.find('size').find('width').text)
height = int(root.find('size').find('height').text)

for obj in root.iter('object'):
xmin = int(obj.find('bndbox').find('xmin').text)
ymin = int(obj.find('bndbox').find('ymin').text)
xmax = int(obj.find('bndbox').find('xmax').text)
ymax = int(obj.find('bndbox').find('ymax').text)
label = obj.find('name').text

# conver xmin ymin xmax ymax to x y w h
x = (xmin + xmax) / width
y = (ymin + ymax) / height
w = (xmax - xmin) / width
h = (ymax - ymin) / height

# FIXME: conver label to id
with open(os.path.join(self.output_annotation_folder, file_name[:-4] + '.txt'), 'a') as f:
f.write(label + ' ' + str(x) + ' ' + str(y) + ' ' + str(w) + ' ' + str(h) + '\n')


def yolo2voc(self):
''' yolo txt format to pascal voc xml format '''

# create output Annotations folder if not exists
if not os.path.exists(self.output_annotation_folder):
os.makedirs(self.output_annotation_folder)

for txt_file in glob(os.path.join(self.input_annotation_folder, '*.txt')):
image_file_name = os.path.basename(txt_file)[:-4] + '.jpg'

# create output image file directory if not exists
if not os.path.exists(self.output_image_folder):
os.makedirs(self.output_image_folder)

# copy image file to output image file directory
shutil.copy(os.path.join(self.input_image_folder, image_file_name),
os.path.join(self.output_image_folder, image_file_name))

# read txt file
with open(txt_file, 'r') as f:
lines = f.readlines()
for line in lines:
line = line.strip()
label, x, y, w, h = line.split(' ')
img = Image.open(os.path.join(self.input_image_folder, image_file_name))

img_width, img_height = img.size

xmin = int(float(x) * img_width)
ymin = int(float(y) * img_height)
xmax = int(float(x) * img_width + float(w) * img_width)
ymax = int(float(y) * img_height + float(h) * img_height)

# create xml file
# REVIEW: need to REVIEW the xml file formation
root = ET.Element('annotation')
ET.SubElement(root, 'folder').text = 'VOC'
ET.SubElement(root, 'filename').text = image_file_name
ET.SubElement(root, 'source').text = 'https://github.com/codePerfectPlus/dataset-convertor'
ET.SubElement(root, 'database').text = 'VOC Format'
ET.SubElement(root, 'date').text = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
root2 = ET.SubElement(root, 'size')
ET.SubElement(root2, 'width').text = str(img_width)
ET.SubElement(root2, 'height').text = str(img_height)
ET.SubElement(root2, 'depth').text = '3'
root3 = ET.SubElement(root, 'object')
ET.SubElement(root3, 'name').text = label
ET.SubElement(root3, 'pose').text = 'Unspecified'
ET.SubElement(root3, 'truncated').text = '0'
ET.SubElement(root3, 'difficult').text = '0'
root4 = ET.SubElement(root3, 'bndbox')
ET.SubElement(root4, 'xmin').text = str(xmin)
ET.SubElement(root4, 'ymin').text = str(ymin)
ET.SubElement(root4, 'xmax').text = str(xmax)
ET.SubElement(root4, 'ymax').text = str(ymax)

# write xml file
tree = ET.ElementTree(root)
tree.write(os.path.join(self.output_annotation_folder, image_file_name[:-4] + '.xml'))


def yolo2coco(self):
''' yolo txt format to coco json format '''
pass

def coco2yolo(self):
''' coco json format to yolo txt format '''
pass

def voc2coco(self):
''' voc xml format to coco json format '''
pass

def coco2voc(self):
''' coco json format to voc xml format '''
pass

def coco2tfrecord(self):
''' coco json format to tfrecord format '''
pass

def yolo2tfrecord(self):
''' yolo txt format to tfrecord format '''
pass

def voc2tfrecord(self):
''' voc xml format to tfrecord format '''
pass

37 changes: 37 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import setuptools

with open("README.md", "r") as fh:
long_description = fh.read()

setuptools.setup(
#Here is the module name.
name="dataset-convertor",

#version of the module
version="0.0.2",

#Name of Author
author="CodePerfectPlus",

#your Email address
author_email="deepak008@live.com",

#Small Description about module
description="Convert Dataset from one format to another format",

long_description=long_description,

#Specifying that we are using markdown file for description
long_description_content_type="text/markdown",

#Any link to reach this module, if you have any webpage or github profile
url="https://github.com/codePerfectPlus/dataset-convertor",
packages=setuptools.find_packages(),

#classifiers like program is suitable for python3, just leave as it is.
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
)
Empty file removed src/__init__.py
Empty file.
Loading

0 comments on commit 7a527d6

Please sign in to comment.