Skip to content

Synonyms


create_skos_dictionary(synonyms, video_id, mode, language)

Create SKOS dictionary from a dict with synonyms.

Creates an RDF graph with SKOS vocabulary structure from a dictionary of words and their synonyms.

Parameters:

Name Type Description Default
synonyms dict

Dictionary with words as keys and lists of synonyms as values

required
video_id str

Identifier for the video

required
mode str

Mode of operation for the annotator

required
language str

Language code for the labels

required

Returns:

Type Description
dict

JSON-LD representation of the SKOS dictionary

Notes

The output follows the W3C Web Annotation Data Model context and includes custom Edurell namespace references

Source code in apps/annotator/code/text_processor/synonyms.py
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
def create_skos_dictionary(synonyms, video_id:str, mode:str, language:str):
    """
    Create SKOS dictionary from a dict with synonyms.

    Creates an RDF graph with SKOS vocabulary structure from a dictionary
    of words and their synonyms.

    Parameters
    ----------
    synonyms : dict
        Dictionary with words as keys and lists of synonyms as values
    video_id : str
        Identifier for the video
    mode : str
        Mode of operation for the annotator
    language : str
        Language code for the labels

    Returns
    -------
    dict
        JSON-LD representation of the SKOS dictionary

    Notes
    -----
    The output follows the W3C Web Annotation Data Model context
    and includes custom Edurell namespace references
    """
    print("***** EKEEL - Video Annotation: synonyms.py::create_skos_dictionary(): Inizio ******")

    graph = Graph()
    skos = Namespace('http://www.w3.org/2004/02/skos/core#')
    graph.bind('skos', skos)
    uri_edurell = 'https://teldh.github.io/edurell#'

    for concept in synonyms.keys():

        uri_concept = URIRef("concept_" + concept.replace(" ", "_"))
        graph.add((uri_concept, RDF['type'], skos['Concept']))
        graph.add((uri_concept, skos['prefLabel'], Literal(concept, lang=language)))
        for synonym in synonyms[concept]:
            graph.add((uri_concept, skos['altLabel'], Literal(synonym, lang=language)))

    # Save graph in file
    #graph.serialize(destination='output.txt', format='json-ld')
    #print graph.serialize(format='json-ld').decode('utf-8')

    context = ["http://www.w3.org/ns/anno.jsonld", 
        {"edu": uri_edurell, 
        "@base": "https://edurell.dibris.unige.it/annotator/"+mode+"/"+video_id+"/", "@version": 1.1}]        

    jsonld = json.loads(graph.serialize(format='json-ld'))
    jsonld = pyld.jsonld.compact(jsonld, context)

    if '@graph' not in jsonld:
        node = {}
        for key in list(jsonld.keys()):
            if key != "@context":
                node[key] = jsonld.pop(key)
        jsonld["@graph"] = [node] if len(node.keys()) > 1 else []

    print("***** EKEEL - Video Annotation: synonyms.py::create_skos_dictionary(): Fine ******")

    return jsonld

get_synonyms_from_list(concepts)

Find synonyms from a list of words using WordNet NLTK.

Parameters:

Name Type Description Default
concepts list

List of words to find synonyms for

required

Returns:

Type Description
dict

Dictionary with words as keys and lists of found synonyms as values

Notes

Words with spaces are converted to underscore format for WordNet lookup and converted back to spaces in the output

Source code in apps/annotator/code/text_processor/synonyms.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def get_synonyms_from_list(concepts:list):
    """
    Find synonyms from a list of words using WordNet NLTK.

    Parameters
    ----------
    concepts : list
        List of words to find synonyms for

    Returns
    -------
    dict
        Dictionary with words as keys and lists of found synonyms as values

    Notes
    -----
    Words with spaces are converted to underscore format for WordNet lookup
    and converted back to spaces in the output
    """
    synonyms=dict()

    keywords = copy.copy(concepts)
    converter = lambda x: x.replace(' ', '_')
    keywords = list(map(converter, keywords))

    synonymsFound={}
    for starting_keyword in keywords:
        wordnetSynset1 = wn.synsets(starting_keyword)
        tempList1=[]
        synonymsFoundTemp=[]
        for synset1 in wordnetSynset1:
            for synWords1 in synset1.lemma_names():
                if(synWords1.lower() != starting_keyword.lower()):
                    tempList1.append(synWords1.lower())

        tempList1=list(set(tempList1))        

        for synonym in tempList1:
            for word in keywords:
                if (synonym==word):
                    synonymsFoundTemp.append(word.replace('_',' '))
        synonymsFoundTemp=list(set(synonymsFoundTemp))

        synonyms[starting_keyword.replace('_',' ')]=synonymsFoundTemp

    return synonyms