14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150 | class CocoDataset(BaseModel):
info: Info
images: List[Image] = []
annotations: List[Annotation] = []
categories: List[Category] = []
sources: List[Source] = []
next_image_id: int = Field(default=1, exclude=True)
next_annotation_id: int = Field(default=1, exclude=True)
next_source_id: int = Field(default=1, exclude=True)
@root_validator
def _set_ids(cls: CocoDataset, values: Dict[str, Any]) -> Dict[str, Any]:
values["next_image_id"] = len(values["images"]) + 1
values["next_annotation_id"] = len(values["annotations"]) + 1
values["next_source_id"] = len(values["sources"])
return values
def add_annotation(self, annotation: Annotation) -> None:
self.annotations.append(annotation)
self.next_annotation_id += 1
def add_image(self, image: Image) -> None:
self.images.append(image)
self.next_image_id += 1
def add_source(self, source_path: pathlib.Path, date_captured: datetime) -> None:
sources = [ssrc for ssrc in self.sources if ssrc.file_name == source_path]
if sources:
assert len(sources) == 1
source = sources[0]
self.bump_version(bump_method="patch")
else:
source = Source(
id=len(self.sources) + 1,
file_name=source_path,
date_captured=date_captured,
)
self.sources.append(source)
self.bump_version(bump_method="minor")
self.next_source_id = source.id
def add_categories(
self,
category_ids: Optional[Series],
category_names: Optional[Series],
super_names: Optional[Series],
) -> None:
# initializing values
super_default = "1"
names_present = ids_present = False
# Loading all existing Category instances as a single dataframe
category_pd = pd.DataFrame(
[category.dict() for category in self.categories],
columns=Category.schema()["properties"].keys(),
)
# checking if names can be assigned to uid_array (used to check duplicates)
if category_names is not None:
category_names: np.ndarray = category_names.to_numpy()
uid_array = category_names
uid_attribute = "name"
names_present = True
# checking if ids can be assigned to uid_array (used to check duplicates)
if category_ids is not None:
category_ids: np.ndarray = category_ids.to_numpy()
uid_array = category_ids # overrides existing array because ids are leading
uid_attribute = "id"
ids_present = True
if not names_present and not ids_present:
raise AttributeError("At least one category attribute must be present")
# masking out duplicate values and exiting if all duplicates
original_shape = uid_array.shape
_, indices = np.unique(uid_array, return_index=True)
uid_array = uid_array[indices]
member_mask = np.isin(uid_array, category_pd[uid_attribute])
new_members = uid_array[~member_mask]
new_shape = new_members.shape
if new_shape[0] == 0:
return
# creating default supercategory_names if not given
if super_names is None:
super_names = np.full(
shape=new_shape, fill_value=super_default
) # type: ignore[assignment]
else:
super_names: np.ndarray = super_names.to_numpy()
assert super_names.shape == original_shape
super_names = super_names[indices][~member_mask]
# creating default category_names if not given (str version of ids)
if ids_present and not names_present:
category_names = new_members.astype(str)
category_ids = new_members
# creating ids if not given (incremental sequence starting from last known id)
elif names_present and not ids_present:
pandas_mask = category_pd[uid_attribute].isin(uid_array[member_mask])
max_id = category_pd.loc[pandas_mask, "id"].max()
start = np.nansum([max_id, 1])
end = start + new_members.size
category_ids = np.arange(start, end) # type: ignore[assignment]
category_names = new_members
# ensuring equal size for category names and ids (if given)
else:
assert category_names.shape == original_shape # type: ignore[union-attr]
category_names = category_names[indices][~member_mask] # type: ignore[index]
category_ids = new_members
# iteratively instancing and appending Category from set ids, names and supers
category_info = zip(category_ids, category_names, super_names)
for cid, name, super in category_info:
category = Category(id=cid, name=name, supercategory=super)
self.categories.append(category)
def bump_version(self, bump_method: str) -> None:
bump_methods = ["patch", "minor", "major"]
version = Version.parse(self.info.version)
if bump_method not in bump_methods:
raise ValueError(f"bump_method needs to be one of {bump_methods}")
elif bump_method == bump_methods[0]:
version = version.bump_patch()
elif bump_method == bump_methods[1]:
version = version.bump_minor()
else:
version = version.bump_major()
self.info.version = str(version)
def verify_used_dir(self, images_dir: pathlib.Path) -> None:
output_dirs = np.unique([image.file_name.parent for image in self.images])
if images_dir not in output_dirs:
self.bump_version(bump_method="major")
|