Skip to main content

Focus Extraction on Parts of a Thesaurus

Abstract

Focus Extraction on Parts of a Thesaurus

There are situations when we do not want to use all of a thesaurus for annotation. Here is the standard request for extraction:

Request

{{url}}/extractor/api/extract?text=Daiquiri is a family of cocktails whose main ingredients are rum, citrus, and sugar or other sweetener.&projectId={{project}}&language=en&useTypes=true&numberOfTerms=0

It produces the following results:

{
    "concepts": [
        {
            "id": "1E034541-9963-0001-EE48-B5D068201D43:https://nextrelease-cons.semantic-web.at/cocktails/bab016bd-2fd5-4809-bfe7-3225d25f116d@en",
            "project": "1E034541-9963-0001-EE48-B5D068201D43",
            "score": 100,
            "uri": "https://nextrelease-cons.semantic-web.at/cocktails/bab016bd-2fd5-4809-bfe7-3225d25f116d",
            "language": "en",
            "prefLabel": "Daiquiri",
            "altLabels": [
                "Banana daiquiri",
                "Daiquiris",
                "Daquiri",
                "Daikiri",
                "Banana daikiri",
                "Daiquiri (cocktail)",
                "Daquari",
                "Strawberry daiquiri"
            ],
            "conceptSchemes": [
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktails/8d052dfc-44bf-4985-8ce3-4564570a161b",
                    "title": "Cocktails"
                }
            ],
            "customSchemeTypes": [
                {
                    "uri": "http://www.w3.org/2004/02/skos/core#Concept",
                    "label": "Concept"
                },
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktail-ontology/Cocktail"
                }
            ],
            "frequencyInDocument": 1
        },
        {
            "id": "1E034541-9963-0001-EE48-B5D068201D43:https://nextrelease-cons.semantic-web.at/cocktails/e98ac9a3-64f3-4cc5-8d0d-cf8684b0e7d1@en",
            "project": "1E034541-9963-0001-EE48-B5D068201D43",
            "score": 31,
            "uri": "https://nextrelease-cons.semantic-web.at/cocktails/e98ac9a3-64f3-4cc5-8d0d-cf8684b0e7d1",
            "language": "en",
            "prefLabel": "Rum",
            "altLabels": [
                "Jamaica Spirits",
                "Gunpowder Rum",
                "Caña blanca",
                "Gold rum",
                "Coconut rum",
                "Rum (beverage)",
                "Jamaica Spirit",
                "Spiced Rum",
                "Spiced rum",
                "Cane spirit",
                "Cuban rum",
                "Hard rum"
            ],
            "conceptSchemes": [
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktails/591cf89a-57af-49b8-9042-3fc77408c93e",
                    "title": "Beverages"
                }
            ],
            "customSchemeTypes": [
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktail-ontology/Beverages"
                },
                {
                    "uri": "http://www.w3.org/2004/02/skos/core#Concept",
                    "label": "Concept"
                },
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktail-ontology/Alcoholic-Beverages"
                },
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktail-ontology/Ingredients"
                }
            ],
            "frequencyInDocument": 1
        },
        {
            "id": "1E034541-9963-0001-EE48-B5D068201D43:https://nextrelease-cons.semantic-web.at/cocktails/3c94f5fc-4664-4177-a880-88643c0be9c1@en",
            "project": "1E034541-9963-0001-EE48-B5D068201D43",
            "score": 19,
            "uri": "https://nextrelease-cons.semantic-web.at/cocktails/3c94f5fc-4664-4177-a880-88643c0be9c1",
            "language": "en",
            "prefLabel": "Sugar",
            "altLabels": [
                "Sugary",
                "-Ose",
                "Shurger",
                "Sugar crops",
                "Cube sugar",
                "Foreign matter in refined sugar",
                "Sugar lumps",
                "Molten sugar",
                "Berry sugar",
                "Refined sugar",
                "White refined sugar",
                "Sugar Crystals",
                "Sugar trade",
                "Suggar",
                "Sweet salt",
                "Lump sugar",
                "Raw Sugar",
                "Sugar buzz",
                "Sugar cube",
                "White sugar",
                "Sugar engineering",
                "Sugars",
                "White Sugar"
            ],
            "conceptSchemes": [
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktails/87384ce8-f20a-4201-b06d-3e0b8d832e5d",
                    "title": "Sweeteners"
                }
            ],
            "customSchemeTypes": [
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktail-ontology/Ingredients"
                },
                {
                    "uri": "http://www.w3.org/2004/02/skos/core#Concept",
                    "label": "Concept"
                },
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktail-ontology/Garnish"
                }
            ],
            "frequencyInDocument": 1
        }
    ]
}

Concept Scheme Filter

One way to filter the concepts that should be considered for annotation are concept schemes. With the following call we restrict the concepts to those that are part of the concept schemes specified in the parameter 'conceptSchemeFilters':

Request

{{url}}/extractor/api/extract?text=Daiquiri is a family of cocktails whose main ingredients are rum, citrus, and sugar or other sweetener.&projectId={{project}}&language=en&useTypes=true&numberOfTerms=0&conceptSchemeFilters=https://nextrelease-cons.semantic-web.at/cocktails/591cf89a-57af-49b8-9042-3fc77408c93e,https://nextrelease-cons.semantic-web.at/cocktails/87384ce8-f20a-4201-b06d-3e0b8d832e5d

Now the results are reduced to two concepts (instead of three before):

{
    "concepts": [
        {
            "id": "1E034541-9963-0001-EE48-B5D068201D43:https://nextrelease-cons.semantic-web.at/cocktails/e98ac9a3-64f3-4cc5-8d0d-cf8684b0e7d1@en",
            "project": "1E034541-9963-0001-EE48-B5D068201D43",
            "score": 100,
            "uri": "https://nextrelease-cons.semantic-web.at/cocktails/e98ac9a3-64f3-4cc5-8d0d-cf8684b0e7d1",
            "language": "en",
            "prefLabel": "Rum",
            "altLabels": [
                "Jamaica Spirits",
                "Gunpowder Rum",
                "Caña blanca",
                "Gold rum",
                "Coconut rum",
                "Rum (beverage)",
                "Jamaica Spirit",
                "Spiced Rum",
                "Spiced rum",
                "Cane spirit",
                "Cuban rum",
                "Hard rum"
            ],
            "conceptSchemes": [
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktails/591cf89a-57af-49b8-9042-3fc77408c93e",
                    "title": "Beverages"
                }
            ],
            "customSchemeTypes": [
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktail-ontology/Ingredients"
                },
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktail-ontology/Alcoholic-Beverages"
                },
                {
                    "uri": "http://www.w3.org/2004/02/skos/core#Concept",
                    "label": "Concept"
                },
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktail-ontology/Beverages"
                }
            ],
            "frequencyInDocument": 1
        },
        {
            "id": "1E034541-9963-0001-EE48-B5D068201D43:https://nextrelease-cons.semantic-web.at/cocktails/3c94f5fc-4664-4177-a880-88643c0be9c1@en",
            "project": "1E034541-9963-0001-EE48-B5D068201D43",
            "score": 58,
            "uri": "https://nextrelease-cons.semantic-web.at/cocktails/3c94f5fc-4664-4177-a880-88643c0be9c1",
            "language": "en",
            "prefLabel": "Sugar",
            "altLabels": [
                "Sugary",
                "-Ose",
                "Shurger",
                "Sugar crops",
                "Cube sugar",
                "Foreign matter in refined sugar",
                "Sugar lumps",
                "Molten sugar",
                "Berry sugar",
                "Refined sugar",
                "White refined sugar",
                "Sugar Crystals",
                "Sugar trade",
                "Suggar",
                "Sweet salt",
                "Lump sugar",
                "Raw Sugar",
                "Sugar buzz",
                "Sugar cube",
                "White sugar",
                "Sugar engineering",
                "Sugars",
                "White Sugar"
            ],
            "conceptSchemes": [
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktails/87384ce8-f20a-4201-b06d-3e0b8d832e5d",
                    "title": "Sweeteners"
                }
            ],
            "customSchemeTypes": [
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktail-ontology/Garnish"
                },
                {
                    "uri": "http://www.w3.org/2004/02/skos/core#Concept",
                    "label": "Concept"
                },
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktail-ontology/Ingredients"
                }
            ],
            "frequencyInDocument": 1
        }
    ]
}

Alternatively, you can do the extraction by providing the input text and filters in the JSON format to the /extractor/api/annotate/text endpoint. See example below.

Example shell script

#!/bin/sh
HOST="https://myserver.poolparty.biz"
MIMETYPE="text/turtle"
USER="${YOUR-USER}"
PASS="${YOUR-PASS}"
echo "RUNNING EXTRACTION ON: $HOST"
echo "ACCEPTING: $MIMETYPE"
curl -vvv -u "$USER:$PASS" -X POST -H "content-type:application/json" -H 
"accept:$MIMETYPE" -d "@$1" "$HOST/extractor/api/annotate/text"

Note

Do not forget to adapt the HOST, USER and PASS variables.

Example input text and filters

{
  "projectId":[ "a490afe0-147c-453f-8977-70f71c71b696" ],
  "language":"en",
  "displayText":"true",
  "text":"Daiquiri is a family of cocktails whose main ingredients are rum, citrus, and sugar or other sweetener",
  "numberOfTerms":0,
  "conceptSchemeFilters":[    
"https://nextrelease-cons.semantic-web.at/cocktails/591cf89a-57af-49b8-9042-3fc77408c93e"
  ],
  "documentUri":"urn:simpleextraction:002"
}

Note

Do not forget to adapt projectId.

Custom Class Filter

Another possibility is, to use the custom types assigned to the concepts. With the following call the results are limited to concepts that are of the type 'Ingredients' using the parameter 'customClassFilters':

Request

{{url}}/extractor/api/extract?text=Daiquiri is a family of cocktails whose main ingredients are rum, citrus, and sugar or other sweetener.&projectId={{project}}&language=en&useTypes=true&numberOfTerms=0&customClassFilters=https://nextrelease-cons.semantic-web.at/cocktail-ontology/Ingredients

The results are the same as before, because both of the detected concepts are of that type, although they appear in different concept schemes:

Click to expand the result:

{
    "concepts": [
        {
            "id": "1E034541-9963-0001-EE48-B5D068201D43:https://nextrelease-cons.semantic-web.at/cocktails/e98ac9a3-64f3-4cc5-8d0d-cf8684b0e7d1@en",
            "project": "1E034541-9963-0001-EE48-B5D068201D43",
            "score": 100,
            "uri": "https://nextrelease-cons.semantic-web.at/cocktails/e98ac9a3-64f3-4cc5-8d0d-cf8684b0e7d1",
            "language": "en",
            "prefLabel": "Rum",
            "altLabels": [
                "Jamaica Spirits",
                "Gunpowder Rum",
                "Caña blanca",
                "Gold rum",
                "Coconut rum",
                "Rum (beverage)",
                "Jamaica Spirit",
                "Spiced Rum",
                "Spiced rum",
                "Cane spirit",
                "Cuban rum",
                "Hard rum"
            ],
            "conceptSchemes": [
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktails/591cf89a-57af-49b8-9042-3fc77408c93e",
                    "title": "Beverages"
                }
            ],
            "customSchemeTypes": [
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktail-ontology/Ingredients"
                },
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktail-ontology/Alcoholic-Beverages"
                },
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktail-ontology/Beverages"
                },
                {
                    "uri": "http://www.w3.org/2004/02/skos/core#Concept",
                    "label": "Concept"
                }
            ],
            "frequencyInDocument": 1
        },
        {
            "id": "1E034541-9963-0001-EE48-B5D068201D43:https://nextrelease-cons.semantic-web.at/cocktails/3c94f5fc-4664-4177-a880-88643c0be9c1@en",
            "project": "1E034541-9963-0001-EE48-B5D068201D43",
            "score": 58,
            "uri": "https://nextrelease-cons.semantic-web.at/cocktails/3c94f5fc-4664-4177-a880-88643c0be9c1",
            "language": "en",
            "prefLabel": "Sugar",
            "altLabels": [
                "Sugary",
                "-Ose",
                "Shurger",
                "Sugar crops",
                "Cube sugar",
                "Foreign matter in refined sugar",
                "Sugar lumps",
                "Molten sugar",
                "Berry sugar",
                "Refined sugar",
                "White refined sugar",
                "Sugar Crystals",
                "Sugar trade",
                "Suggar",
                "Sweet salt",
                "Lump sugar",
                "Raw Sugar",
                "Sugar buzz",
                "Sugar cube",
                "White sugar",
                "Sugar engineering",
                "Sugars",
                "White Sugar"
            ],
            "conceptSchemes": [
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktails/87384ce8-f20a-4201-b06d-3e0b8d832e5d",
                    "title": "Sweeteners"
                }
            ],
            "customSchemeTypes": [
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktail-ontology/Garnish"
                },
                {
                    "uri": "http://www.w3.org/2004/02/skos/core#Concept",
                    "label": "Concept"
                },
                {
                    "uri": "https://nextrelease-cons.semantic-web.at/cocktail-ontology/Ingredients"
                }
            ],
            "frequencyInDocument": 1
        }
    ]
}