Skip to content

COCO Models

Info

Bases: BaseModel

Source code in geococo\coco_models.py
153
154
155
156
157
158
class Info(BaseModel):
    version: str = str(Version(major=0))
    year: Optional[int] = None
    description: Optional[str] = None
    contributor: Optional[str] = None
    date_created: Optional[datetime] = None

Image

Bases: BaseModel

Source code in geococo\coco_models.py
161
162
163
164
165
166
167
class Image(BaseModel):
    id: int
    width: int
    height: int
    file_name: pathlib.Path
    source_id: int
    date_captured: datetime

Annotation

Bases: BaseModel

Source code in geococo\coco_models.py
170
171
172
173
174
175
176
177
class Annotation(BaseModel):
    id: int
    image_id: int
    category_id: int
    segmentation: RleDict
    area: float
    bbox: List[float]
    iscrowd: int

Category

Bases: BaseModel

Source code in geococo\coco_models.py
180
181
182
183
class Category(BaseModel):
    id: int
    name: str
    supercategory: str

RleDict

Bases: TypedDict

Source code in geococo\coco_models.py
186
187
188
class RleDict(TypedDict):
    size: List[int]
    counts: bytes

Source

Bases: BaseModel

Source code in geococo\coco_models.py
191
192
193
194
class Source(BaseModel):
    id: int
    file_name: pathlib.Path
    date_captured: datetime

CocoDataset

Bases: BaseModel

Source code in geococo\coco_models.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
class CocoDataset(BaseModel):
    info: Info
    images: List[Image] = []
    annotations: List[Annotation] = []
    categories: List[Category] = []
    sources: List[Source] = []
    next_image_id: int = Field(default=1, exclude=True)
    next_annotation_id: int = Field(default=1, exclude=True)
    next_source_id: int = Field(default=1, exclude=True)

    @root_validator
    def _set_ids(cls: CocoDataset, values: Dict[str, Any]) -> Dict[str, Any]:
        values["next_image_id"] = len(values["images"]) + 1
        values["next_annotation_id"] = len(values["annotations"]) + 1
        values["next_source_id"] = len(values["sources"])
        return values

    def add_annotation(self, annotation: Annotation) -> None:
        self.annotations.append(annotation)
        self.next_annotation_id += 1

    def add_image(self, image: Image) -> None:
        self.images.append(image)
        self.next_image_id += 1

    def add_source(self, source_path: pathlib.Path, date_captured: datetime) -> None:
        sources = [ssrc for ssrc in self.sources if ssrc.file_name == source_path]
        if sources:
            assert len(sources) == 1
            source = sources[0]
            self.bump_version(bump_method="patch")
        else:
            source = Source(
                id=len(self.sources) + 1,
                file_name=source_path,
                date_captured=date_captured,
            )
            self.sources.append(source)
            self.bump_version(bump_method="minor")

        self.next_source_id = source.id

    def add_categories(
        self,
        category_ids: Optional[Series],
        category_names: Optional[Series],
        super_names: Optional[Series],
    ) -> None:
        # initializing values
        super_default = "1"
        names_present = ids_present = False

        # Loading all existing Category instances as a single dataframe
        category_pd = pd.DataFrame(
            [category.dict() for category in self.categories],
            columns=Category.schema()["properties"].keys(),
        )

        # checking if names can be assigned to uid_array (used to check duplicates)
        if category_names is not None:
            category_names: np.ndarray = category_names.to_numpy()
            uid_array = category_names
            uid_attribute = "name"
            names_present = True

        # checking if ids can be assigned to uid_array (used to check duplicates)
        if category_ids is not None:
            category_ids: np.ndarray = category_ids.to_numpy()
            uid_array = category_ids  # overrides existing array because ids are leading
            uid_attribute = "id"
            ids_present = True
        if not names_present and not ids_present:
            raise AttributeError("At least one category attribute must be present")

        # masking out duplicate values and exiting if all duplicates
        original_shape = uid_array.shape
        _, indices = np.unique(uid_array, return_index=True)
        uid_array = uid_array[indices]
        member_mask = np.isin(uid_array, category_pd[uid_attribute])
        new_members = uid_array[~member_mask]
        new_shape = new_members.shape
        if new_shape[0] == 0:
            return

        # creating default supercategory_names if not given
        if super_names is None:
            super_names = np.full(
                shape=new_shape, fill_value=super_default
            )  # type: ignore[assignment]
        else:
            super_names: np.ndarray = super_names.to_numpy()
            assert super_names.shape == original_shape
            super_names = super_names[indices][~member_mask]

        # creating default category_names if not given (str version of ids)
        if ids_present and not names_present:
            category_names = new_members.astype(str)
            category_ids = new_members
        # creating ids if not given (incremental sequence starting from last known id)
        elif names_present and not ids_present:
            pandas_mask = category_pd[uid_attribute].isin(uid_array[member_mask])
            max_id = category_pd.loc[pandas_mask, "id"].max()
            start = np.nansum([max_id, 1])
            end = start + new_members.size
            category_ids = np.arange(start, end)  # type: ignore[assignment]
            category_names = new_members
        # ensuring equal size for category names and ids (if given)
        else:
            assert category_names.shape == original_shape  # type: ignore[union-attr]
            category_names = category_names[indices][~member_mask] # type: ignore[index]
            category_ids = new_members

        # iteratively instancing and appending Category from set ids, names and supers
        category_info = zip(category_ids, category_names, super_names)
        for cid, name, super in category_info:
            category = Category(id=cid, name=name, supercategory=super)
            self.categories.append(category)

    def bump_version(self, bump_method: str) -> None:
        bump_methods = ["patch", "minor", "major"]
        version = Version.parse(self.info.version)

        if bump_method not in bump_methods:
            raise ValueError(f"bump_method needs to be one of {bump_methods}")
        elif bump_method == bump_methods[0]:
            version = version.bump_patch()
        elif bump_method == bump_methods[1]:
            version = version.bump_minor()
        else:
            version = version.bump_major()

        self.info.version = str(version)

    def verify_used_dir(self, images_dir: pathlib.Path) -> None:
        output_dirs = np.unique([image.file_name.parent for image in self.images])
        if images_dir not in output_dirs:
            self.bump_version(bump_method="major")