Let’s assume you’ve already created your conditioning images and you have the following folder structure:
my_dataset/
├── README.md
└──data/
├── captions.jsonl
├── conditioning_images
│ ├── 00001.jpg
│ └── 00002.jpg
└── images
├── 00001.jpg
└── 00002.jpg
In this structure, the conditioning_images
folder stores your conditioning images, while the images
folder contains the target images for your ControlNet. The captions.jsonl
file holds the captions linked to these images.
{"image": "images/00001.jpg", "text": "This is the caption of the first image."}
{"image": "images/00002.jpg", "text": "This is the caption of the second image."}
Note
The caption file (or the following metadata file) can also be acsv
file. However, if you opt for CSV, be careful of the value separator, as the text may contain commas, which could lead to parsing issues.
Create Metadata File#
A metadata file is a good way to provide additional information about your dataset. It can include various types of data, such as bounding boxes, categories, text, or in our case, a path to the conditioning image.
Let’s create the metadata.jsonl
file:
import json
from pathlib import Path
def create_metadata(data_dir, output_file):
metadata = []
try:
with open(f"{data_dir}/captions.jsonl", "r") as f:
for line in f:
data = json.loads(line)
file_name = Path(data["image"]).name
metadata.append(
{
"image": data["image"],
"conditioning_image": f"conditioning_images/{file_name}",
"text": data["text"],
}
)
with open(f"{data_dir}/metadata.jsonl", "w") as f:
for line in metadata:
f.write(json.dumps(line) + "\n")
except (FileNotFoundError, json.JSONDecodeError) as e:
print(f"Error processing data: {e}")
# Example usage:
data_dir = "my_dataset/data"
create_metadata(data_dir)
This will create a metadata.jsonl
containing all the information we need for our ControlNet. Each line in the file corresponds to an image, a conditioning image, and the associated text caption.
{"image": "images/00001.jpg", "conditioning_image": "conditioning_images/00001.jpg", "text": "This is the caption of the first image."}
{"image": "images/00002.jpg", "conditioning_image": "conditioning_images/00002.jpg", "text": "This is the caption of the second image."}
Once you’ve created the metadata.jsonl
file, your file structure should look like this:
my_dataset/
├── README.md
└──data/
├── captions.jsonl
├── metadata.jsonl
├── conditioning_images
│ ├── 00001.jpg
│ └── 00002.jpg
└── images
├── 00001.jpg
└── 00002.jpg
Create a Loading Script#
Finally, we must create a loading script that handles all the data in the metadata.jsonl
file. The script should be located in the same directory as the dataset and should have the same name.
Your directory structure should look like this:
my_dataset/
├── README.md
├── my_dataset.py
└──data/
├── captions.jsonl
├── metadata.jsonl
├── conditioning_images
│ ├── 00001.jpg
│ └── 00002.jpg
└── images
├── 00001.jpg
└── 00002.jpg
For the script, we need to implement a class that inherits from GeneratorBasedBuilder
and contains these three methods:
_info
stores information about your dataset._split_generators
defines the splits._generate_examples
generates the images and labels for each split.
import datasets
class MyDataset(datasets.GeneratorBasedBuilder):
def _info(self):
def _split_generators(self, dl_manager):
def _generate_examples(self, metadata_path, images_dir, conditioning_images_dir):
Adding Dataset Metadata#
There are many options for specifying information about your dataset, but the most important ones are:
features
specify the dataset column types.supervised_keys
that specify the input features.
# Global variables
_DESCRIPTION = "TODO"
_HOMEPAGE = "TODO"
_LICENSE = "TODO"
_CITATION = "TODO"
_FEATURES = datasets.Features(
{
"image": datasets.Image(),
"conditioning_image": datasets.Image(),
"text": datasets.Value("string"),
},
)
As you can see above, I’ve set some variables to “TODO”. These options are for informational purposes only and do not affect loading.
def _info(self):
return datasets.DatasetInfo(
description=_DESCRIPTION,
features=_FEATURES,
supervised_keys=("conditioning_image", "text"),
homepage=_HOMEPAGE,
license=_LICENSE,
citation=_CITATION,
)
Define the Dataset Splits#
dl_manager
is used to download a dataset from a Huggingface repo but here we use it to get the data directory path which is passed in the load_dataset
function.
Here we define the local paths to our data
metadata_path
path of themetadata.jsonl
fileimages_dir
path to the imagesconditioning_images_dir
path to the conditioning images
Note
If you’ve chosen different names for your folder structure, you may need to adjust themetadata_path
,images_dir
, andconditioning_images_dir
variables.
def _split_generators(self, dl_manager):
base_path = Path(dl_manager._base_path).resolve()
metadata_path = base_path / "data" / "metadata.jsonl"
images_dir = base_path / "data"
conditioning_images_dir = base_path / "data"
return [
datasets.SplitGenerator(
name=datasets.Split.TRAIN,
# These kwargs will be passed to _generate_examples
gen_kwargs={
"metadata_path": str(metadata_path),
"images_dir": str(images_dir),
"conditioning_images_dir": str(conditioning_images_dir),
},
),
]
The last method loads the matadata.jsonl
file and generates the image and its associated conditioning image and text.
@staticmethod
def load_jsonl(path):
"""Generator to load jsonl file."""
with open(path, "r") as f:
for line in f:
yield json.loads(line)
def _generate_examples(self, metadata_path, images_dir, conditioning_images_dir):
for row in self.load_jsonl(metadata_path):
text = row["text"]
image_path = row["image"]
image_path = os.path.join(images_dir, image_path)
image = open(image_path, "rb").read()
conditioning_image_path = row["conditioning_image"]
conditioning_image_path = os.path.join(
conditioning_images_dir, row["conditioning_image"]
)
conditioning_image = open(conditioning_image_path, "rb").read()
yield row["image"], {
"text": text,
"image": {
"path": image_path,
"bytes": image,
},
"conditioning_image": {
"path": conditioning_image_path,
"bytes": conditioning_image,
},
}
Following these steps, you can load a ControlNet dataset from a local path.
# with the loading script, we can load the dataset
ds = load_dataset("my_dataset")
# (optional)
# pass trust_remote_code=True to avoid the warning about custom code
# ds = load_dataset("my_dataset", trust_remote_code=True)
Full code for the loading script:
import os
import json
import datasets
from pathlib import Path
_VERSION = datasets.Version("0.0.2")
_DESCRIPTION = "TODO"
_HOMEPAGE = "TODO"
_LICENSE = "TODO"
_CITATION = "TODO"
_FEATURES = datasets.Features(
{
"image": datasets.Image(),
"conditioning_image": datasets.Image(),
"text": datasets.Value("string"),
},
)
_DEFAULT_CONFIG = datasets.BuilderConfig(name="default", version=_VERSION)
class MyDataset(datasets.GeneratorBasedBuilder):
BUILDER_CONFIGS = [_DEFAULT_CONFIG]
DEFAULT_CONFIG_NAME = "default"
def _info(self):
return datasets.DatasetInfo(
description=_DESCRIPTION,
features=_FEATURES,
supervised_keys=("conditioning_image", "text"),
homepage=_HOMEPAGE,
license=_LICENSE,
citation=_CITATION,
)
def _split_generators(self, dl_manager):
base_path = Path(dl_manager._base_path)
metadata_path = base_path / "data" / "metadata.jsonl"
images_dir = base_path / "data"
conditioning_images_dir = base_path / "data"
return [
datasets.SplitGenerator(
name=datasets.Split.TRAIN,
# These kwargs will be passed to _generate_examples
gen_kwargs={
"metadata_path": metadata_path,
"images_dir": images_dir,
"conditioning_images_dir": conditioning_images_dir,
},
),
]
@staticmethod
def load_jsonl(path):
"""Generator to load jsonl file."""
with open(path, "r") as f:
for line in f:
yield json.loads(line)
def _generate_examples(self, metadata_path, images_dir, conditioning_images_dir):
for row in self.load_jsonl(metadata_path):
text = row["text"]
image_path = row["image"]
image_path = os.path.join(images_dir, image_path)
image = open(image_path, "rb").read()
conditioning_image_path = row["conditioning_image"]
conditioning_image_path = os.path.join(
conditioning_images_dir, row["conditioning_image"]
)
conditioning_image = open(conditioning_image_path, "rb").read()
yield row["image"], {
"text": text,
"image": {
"path": image_path,
"bytes": image,
},
"conditioning_image": {
"path": conditioning_image_path,
"bytes": conditioning_image,
},
}