Skip to content

Agreement


Agreement metrics module for concept maps.

This module provides functionality for computing various agreement metrics between annotators and creating gold standard annotations from multiple annotators' inputs.

Functions:

Name Description
create_gold

Creates gold standard from multiple annotators

createAllComb

Generates all possible concept pairs

createUserRel

Creates user relationship pairs from annotations

check_trans

Checks for transitive relationships

creaCoppieAnnot

Creates annotation pairs and counts agreements

computeK

Computes kappa coefficient

computeFleiss

Computes Fleiss' kappa for multiple raters

computeKappaFleiss

Helper function for Fleiss' kappa computation

checkEachLineCount

Validates rating consistency across lines

checkEachLineCount(mat)

Check that each line has same number of ratings.

Parameters:

Name Type Description Default
mat list

Matrix of ratings to check

required

Returns:

Type Description
int

Number of ratings per line

Raises:

Type Description
AssertionError

If lines have different rating counts

Source code in apps/annotator/code/metrics/agreement.py
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
def checkEachLineCount(mat):
    """
    Check that each line has same number of ratings.

    Parameters
    ----------
    mat : list
        Matrix of ratings to check

    Returns
    -------
    int
        Number of ratings per line

    Raises
    ------
    AssertionError
        If lines have different rating counts
    """
    """ Assert that each line has a constant number of ratings
        @param mat The matrix checked
        @return The number of ratings
        @throws AssertionError If lines contain different number of ratings """
    n = sum(mat[0])


    assert all(sum(line) == n for line in mat[1:]), "Line count != %d (n value)." % n

    return n

check_trans(rater, term_pairs_tuple, pair)

Check for transitive relationships in annotations.

Parameters:

Name Type Description Default
rater str

Rater identifier

required
term_pairs_tuple dict

Dictionary of term pairs by rater

required
pair str

Concept pair to check

required

Returns:

Type Description
bool

True if transitive relationship exists

Source code in apps/annotator/code/metrics/agreement.py
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
def check_trans(rater, term_pairs_tuple, pair):
    """
    Check for transitive relationships in annotations.

    Parameters
    ----------
    rater : str
        Rater identifier
    term_pairs_tuple : dict
        Dictionary of term pairs by rater
    pair : str
        Concept pair to check

    Returns
    -------
    bool
        True if transitive relationship exists
    """
    # print(pair)
    # print(rater2)
    g = networkx.DiGraph(term_pairs_tuple[rater])
    if pair.split("/-/")[0] in g and pair.split("/-/")[1] in g:
        if networkx.has_path(g, source=pair.split("/-/")[0], target=pair.split("/-/")[1]):
            return True
    else:
        return False

computeFleiss(term_pairs, all_combs)

Compute Fleiss' kappa for multiple raters.

Parameters:

Name Type Description Default
term_pairs dict

Dictionary of term pairs by rater

required
all_combs list

List of all possible combinations

required

Returns:

Type Description
float

Fleiss' kappa coefficient

Source code in apps/annotator/code/metrics/agreement.py
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
def computeFleiss(term_pairs, all_combs):
    """
    Compute Fleiss' kappa for multiple raters.

    Parameters
    ----------
    term_pairs : dict
        Dictionary of term pairs by rater
    all_combs : list
        List of all possible combinations

    Returns
    -------
    float
        Fleiss' kappa coefficient
    """
    matrix_fleiss = []

    for item in all_combs:

        countZero = 0
        countOne = 0
        for rater, values in term_pairs.items():
            lista = []
            if item not in values:
                countZero = countZero + 1
            if item in values:
                countOne = countOne + 1
        lista.insert(0, countZero)
        lista.insert(1, countOne)
        matrix_fleiss.append(lista)

    return computeKappaFleiss(matrix_fleiss)

computeK(conteggio, pairs)

Compute kappa coefficient for inter-rater agreement.

Parameters:

Name Type Description Default
conteggio dict

Agreement counts dictionary

required
pairs list

List of all possible pairs

required

Returns:

Type Description
float

Kappa coefficient

Source code in apps/annotator/code/metrics/agreement.py
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
def computeK(conteggio, pairs):
    """
    Compute kappa coefficient for inter-rater agreement.

    Parameters
    ----------
    conteggio : dict
        Agreement counts dictionary
    pairs : list
        List of all possible pairs

    Returns
    -------
    float
        Kappa coefficient
    """
    Po = (conteggio["1,1"] + conteggio["0,0"]) / float(len(pairs))
    Pe1 = ((conteggio["1,1"] + conteggio["1,0"]) / float(len(pairs))) * (
                (conteggio["1,1"] + conteggio["0,1"]) / float(len(pairs)))
    Pe2 = ((conteggio["0,1"] + conteggio["0,0"]) / float(len(pairs))) * (
                (conteggio["1,0"] + conteggio["0,0"]) / float(len(pairs)))
    Pe = Pe1 + Pe2
    k = (Po - Pe) / float(1 - Pe)
    return k

computeKappaFleiss(mat)

Compute Fleiss' kappa from rating matrix.

Parameters:

Name Type Description Default
mat list

Matrix of ratings [subjects][categories]

required

Returns:

Type Description
float

Fleiss' kappa coefficient

Source code in apps/annotator/code/metrics/agreement.py
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
def computeKappaFleiss(mat):
    """
    Compute Fleiss' kappa from rating matrix.

    Parameters
    ----------
    mat : list
        Matrix of ratings [subjects][categories]

    Returns
    -------
    float
        Fleiss' kappa coefficient
    """
    """ Computes the Kappa value
        @param n Number of rating per subjects (number of human raters)
        @param mat Matrix[subjects][categories]
        @return The Kappa value """
    print(mat)
    n = checkEachLineCount(mat)  # PRE : every line count must be equal to n
    print(n)
    N = len(mat)
    k = len(mat[0])

    # Computing p[] (accordo sugli 0 e accordo sugli 1)
    p = [0.0] * k
    for j in range(k):
        p[j] = 0.0
        for i in range(N):
            p[j] += mat[i][j]
        p[j] /= N * n

    # Computing P[]  (accordo su ogni singola coppia di concetti)
    P = [0.0] * N
    for i in range(N):
        P[i] = 0.0
        for j in range(k):
            P[i] += mat[i][j] * mat[i][j]
        P[i] = (P[i] - n) / (n * (n - 1))

    # Computing Pbar (accordo osservato)
    Pbar = sum(P) / N

    # Computing PbarE (accordo dovuto al caso)
    PbarE = 0.0
    for pj in p:
        PbarE += pj * pj

    kappa = (Pbar - PbarE) / (1 - PbarE)

    return kappa

creaCoppieAnnot(rater1, rater2, term_pairs, pairs, term_pairs_tuple)

Create annotation pairs and compute agreement counts.

Parameters:

Name Type Description Default
rater1 str

First rater identifier

required
rater2 str

Second rater identifier

required
term_pairs dict

Dictionary of term pairs by rater

required
pairs list

List of all possible pairs

required
term_pairs_tuple dict

Dictionary of term pair tuples

required

Returns:

Type Description
tuple

(annotation pairs, agreement counts)

Source code in apps/annotator/code/metrics/agreement.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def creaCoppieAnnot(rater1, rater2, term_pairs, pairs, term_pairs_tuple):
    """
    Create annotation pairs and compute agreement counts.

    Parameters
    ----------
    rater1 : str
        First rater identifier
    rater2 : str
        Second rater identifier
    term_pairs : dict
        Dictionary of term pairs by rater
    pairs : list
        List of all possible pairs
    term_pairs_tuple : dict
        Dictionary of term pair tuples

    Returns
    -------
    tuple
        (annotation pairs, agreement counts)
    """
    coppieannot = {}
    conteggio = {"1,1": 0, "1,0": 0, "0,1": 0, "0,0": 0}
    for pair in pairs:
        # per ogni concept pair controllo fra le coppie E i paths di r1
        if pair in term_pairs[rater1] or check_trans(rater1, term_pairs_tuple, pair):
            # se presente, controllo fra coppie e paths di r2 e incremento i contatori
            if pair in term_pairs[rater2] or check_trans(rater2, term_pairs_tuple, pair):
                coppieannot[pair] = "1,1"
                conteggio["1,1"] += 2  # inv_pt1: scelgo di considerare le coppie inverse come both agree
                conteggio["0,0"] -= 1  # inv_pt2: compenso la scelta di tenenre conto le inverse in both agree
            # conteggio["1,1"]+=1 #no_inv: le coppie inverse valgolo come both diagree
            else:
                coppieannot[pair] = "1,0"
                conteggio["1,0"] += 1
        # altrimenti, se manca coppia e percorso in r1 e r2 o solo in r1, incrementa questi contatori
        elif pair not in term_pairs[rater1]:
            if pair not in term_pairs[rater2] and not check_trans(rater2, term_pairs_tuple, pair):
                coppieannot[pair] = "0,0"
                conteggio["0,0"] += 1
            else:
                coppieannot[pair] = "0,1"
                conteggio["0,1"] += 1
    return coppieannot, conteggio

createAllComb(words)

Create all possible concept pairs from word list.

Parameters:

Name Type Description Default
words list

List of concepts/words

required

Returns:

Type Description
list

All possible unique concept pairs

Source code in apps/annotator/code/metrics/agreement.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
def createAllComb(words):
    """
    Create all possible concept pairs from word list.

    Parameters
    ----------
    words : list
        List of concepts/words

    Returns
    -------
    list
        All possible unique concept pairs
    """
    #creo tutte le possibili coppie di concetti
    all_combs=[]
    for term in words:
        for i in range(len(words)):
            if term != words[i]:
                combination = term+"/-/"+words[i]
                combination_inv = words[i]+"/-/"+term
                if combination_inv not in all_combs:
                    all_combs.append(combination)
    return all_combs

createUserRel(file, all_combs)

Create user relationship pairs from annotations.

Parameters:

Name Type Description Default
file list

List of annotation relationships

required
all_combs list

List of all possible combinations

required

Returns:

Type Description
tuple

(relationships, updated combinations, relationship tuples)

Source code in apps/annotator/code/metrics/agreement.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
def createUserRel(file, all_combs):
    """
    Create user relationship pairs from annotations.

    Parameters
    ----------
    file : list
        List of annotation relationships
    all_combs : list
        List of all possible combinations

    Returns
    -------
    tuple
        (relationships, updated combinations, relationship tuples)
    """
    temp = []
    term_pairs_tuple = []
    for annot_pairs in file:
        concept_pair=annot_pairs["prerequisite"]+"/-/"+annot_pairs["target"]
        if(concept_pair not in all_combs):
            all_combs.append(concept_pair)
        temp.append(concept_pair)

        tupla = (annot_pairs["prerequisite"], annot_pairs["target"])
        term_pairs_tuple.append(tupla)


    return temp, all_combs, term_pairs_tuple

create_gold(video, annotators, combination_criteria, name)

Create gold standard annotations from multiple annotators.

Parameters:

Name Type Description Default
video str

Video identifier

required
annotators list

List of annotator identifiers

required
combination_criteria str

Criteria for combining annotations ('union' supported)

required
name str

Name for the gold standard

required

Returns:

Type Description
None

Stores results in database

Source code in apps/annotator/code/metrics/agreement.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def create_gold(video, annotators, combination_criteria, name):
    """
    Create gold standard annotations from multiple annotators.

    Parameters
    ----------
    video : str
        Video identifier
    annotators : list
        List of annotator identifiers
    combination_criteria : str
        Criteria for combining annotations ('union' supported)
    name : str
        Name for the gold standard

    Returns
    -------
    None
        Stores results in database
    """
    # Function to merge dictionaries
    def mergeDictionary(d1, d2):
       d3 = {**d1, **d2}
       for key in d3.keys():
           if key in d1 and key in d2:
                d3[key] = list(set(d3[key]+d1[key]))
       return d3

    relations = []
    definitions = []
    conceptVocabulary = {}
    if combination_criteria == "union":
        for annotator in annotators:
            relations += mongo.get_concept_map(annotator, video)
            definitions += mongo.get_definitions(annotator, video)
            #db_conceptVocabulary = db_mongo.get_vocabulary(annotator, video) take from db
            db_conceptVocabulary = None # to start empty
            if(db_conceptVocabulary != None):
                conceptVocabulary = mergeDictionary(conceptVocabulary, db_conceptVocabulary)

        # If the concept vocabulary is new (empty) then initialize it to empty synonyms
        if(conceptVocabulary == {}) :
            for i in mongo.get_concepts(annotators[0], video):
                conceptVocabulary[i] = []

        annotations = {"relations":relations, "definitions":definitions, "id":video}
        _, jsonld = annotations_to_jsonLD(annotations, isAutomatic=True)

        data = jsonld.copy()
        data["video_id"] = video
        data["graph_type"] = "gold standard"
        data["gold_name"] = name
        data["conceptVocabulary"] = create_skos_dictionary(conceptVocabulary, video,"auto")

        mongo.insert_gold(data)


    print(relations)