DBPedia Natural Language Interface Using Huggingface Transformer

I prototyped a simple natural language question answering demo in about 90 minutes. I accept a query like “where does Bill Gates work?”, find the likely URI for Bill Gates, collect some comment text for this DBPedia entity, and then pass the original query to the transformer model with the “context” being the comment text collected via a SPARQL query. I run this on Google Colab. Note that I saved my Jupyter Notebook as a python file that is in the listing below. Note the use of ! to run shell commands (e.g., !pip install transformers).
# -*- coding: utf-8 -*-
"""DbPedia QA system.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1FX-0eizj2vayXsqfSB2ONuJYG8BaYpGO

**DBPedia Question Answering System**
Copyright 2021 Mark Watson. All rights reserved. License: Apache 2
"""

!pip install transformers
!pip install SPARQLWrapper

from transformers import pipeline

qa = pipeline(
    "question-answering",
    #model="NeuML/bert-small-cord19qa",
    model="NeuML/bert-small-cord19-squad2",
    tokenizer="NeuML/bert-small-cord19qa"
)

!pip install import spacy
!python -m spacy download en_core_web_sm

import spacy

nlp_model = spacy.load('en')

from SPARQLWrapper import SPARQLWrapper, JSON

sparql = SPARQLWrapper("http://dbpedia.org/sparql")

def query(query):
  sparql.setQuery(query)
  sparql.setReturnFormat(JSON)
  return sparql.query().convert()['results']['bindings']

nlp_model = spacy.load('en')

def entities_in_text(s):
    doc = nlp_model(s)
    ret = {}
    for [ename, etype] in [[entity.text, entity.label_] for entity in doc.ents
        ]:
        if etype in ret:
            ret[etype] = ret[etype] + [ename]
        else:
            ret[etype] = [ename]
    return ret

def dbpedia_get_entities_by_name(name, dbpedia_type):
  sparql = "select distinct ?s ?comment where {{ ?s <http://www.w3.org/2000/01/rdf-schema#label>  \"{}\"@en . ?s <http://www.w3.org/2000/01/rdf-schema#comment>  ?comment  . FILTER  (lang(?comment) = 'en') . ?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> {} . }} limit 15".format(name, dbpedia_type)
  #print(sparql)
  results = query(sparql)
  return(results)

entity_type_to_type_uri = {'PERSON': '<http://dbpedia.org/ontology/Person>',
    'GPE': '<http://dbpedia.org/ontology/Place>', 'ORG':
    '<http://dbpedia.org/ontology/Organisation>'}

def QA(query_text):
  entities = entities_in_text(query_text)

  def helper(entity_type):
    ret = ""
    if entity_type in entities:
      for hname in entities[entity_type]:
        results = dbpedia_get_entities_by_name(hname, entity_type_to_type_uri[entity_type])
        for result in results:
          ret += ret + result['comment']['value'] + " . "
    return ret

  context_text = helper('PERSON') + helper('ORG') + helper('GPE')

  print("Answer from transformer model:")
  print("Original query: ", query_text)
  print("Answer:")

  answer = qa({
                "question": query_text,
                "context": context_text
               })
  print(answer)

QA("where does Bill Gates work?")
QA("where is IBM is headquartered?")
QA("who is Bill Clinton married to?")
The output looks like this:
Answer from transformer model:
Original query:  where does Bill Gates work?
Answer:
{'score': 0.31679803133010864, 'start': 213,
 'end': 222, 'answer': 'Microsoft'}
Answer from transformer model:
Original query:  where is IBM is headquartered?
Answer:
{'score': 0.8704459071159363, 'start': 115, 'end': 131,
 'answer': 'Armonk, New York'}
Answer from transformer model:
Original query:  who is Bill Clinton married to?
Answer:
{'score': 0.00018714569159783423, 'start': 480, 'end': 505,
 'answer': 'former secretary of state'}

Comments

Popular posts from this blog

My Dad's work with Robert Oppenheimer and Edward Teller

Time and Attention Fragmentation in Our Digital Lives

Ruby Sinatra web apps with background work threads