Source code for soton_corenlppy.re.logic_lib

# !/usr/bin/env python
# -*- coding: utf-8 -*-

"""
..
	/////////////////////////////////////////////////////////////////////////
	//
	// (c) Copyright University of Southampton IT Innovation, 2017
	//
	// Copyright in this software belongs to IT Innovation Centre of
	// Gamma House, Enterprise Road, Southampton SO16 7NS, UK.
	//
	// This software may not be used, sold, licensed, transferred, copied
	// or reproduced in whole or in part in any manner or form or in or
	// on any media by any person other than in accordance with the terms
	// of the Licence Agreement supplied with the software, or otherwise
	// without the prior written consent of the copyright owners.
	//
	// This software is distributed WITHOUT ANY WARRANTY, without even the
	// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
	// PURPOSE, except where stated in the Licence Agreement supplied with
	// the software.
	//
	// Created By : Stuart E. Middleton
	// Created Date : 2016/08/12
	// Created for Project: GRAVITATE
	//
	/////////////////////////////////////////////////////////////////////////
	//
	// Dependancies: None
	//
	/////////////////////////////////////////////////////////////////////////
	'''

Logical proposition library

"""

import array,sys,codecs,os,re,copy,math
import numpy, nltk.corpus
import soton_corenlppy.re

# TODO provide an alternative function to compute on-the-fly DRT and execute it using DRTParser (see strategy). this allows amorpha resolution and much more complex lexico patterns
# TODO compare results of two methods directly


[docs]def compute_logical_proposition_from_relations( list_sent_trees, lex_verb, lex_entity, dict_openie_config = None ) : """ for each sent extract component patterns for relations found, and associated lexio-pos patterns :param list list_sent_trees: list of nltk.Tree representing the sents in that doc after pos annotation :param dict lex_verb: dict of verb types, each with a set of TERMS :param dict lex_entity: dict of entity types, each with a set of TERMS :param dict dict_openie_config: config object returned from soton_corenlppy.re.openie_lib.get_openie_config() :return: dict of logical propositions and a frequency count = { (subject,predicate,object) : freq_in_corpus } :rtype: dict """ if not isinstance( list_sent_trees, list ) : raise Exception( 'invalid list_sent_trees' ) if not isinstance( dict_openie_config, dict ) : raise Exception( 'invalid dict_openie_config' ) dictPropositions = {} for treeSent in list_sent_trees : for leaf in treeSent : if leaf.label() == 'RELATION' : # get components for this relation listComponents = soton_corenlppy.re.comp_sem_lib.extract_components_from_relation( leaf, dict_openie_config = dict_openie_config ) # calculate logical proposition strPredicate = None strSubject = None strObject = None #dict_openie_config['logger'].info( repr(listComponents) ) for tupleComponent in listComponents : if tupleComponent[0] == 'VERB_P' : strMatchTarget = ' ' + tupleComponent[1] + ' ' for strRelType in lex_verb : for strRelToken in lex_verb[strRelType] : # consider stemming here if ' ' + strRelToken + ' ' in strMatchTarget : strPredicate = strRelToken break if tupleComponent[0] in ['ENTITY','ENTITY_LIST'] : strMatchTarget = ' ' + tupleComponent[1] + ' ' for strEntityType in lex_entity : for strEntityToken in lex_entity[strEntityType] : # consider stemming here if ' ' + strEntityToken + ' ' in strMatchTarget : if strPredicate == None : strSubject = strEntityType else : strObject = strEntityType #dict_openie_config['logger'].info( repr(tupleComponent) ) #dict_openie_config['logger'].info( repr((strSubject,strPredicate,strObject)) ) if strPredicate != None : tupleProposition = (strSubject,strPredicate,strObject) if not tupleProposition in dictPropositions : dictPropositions[ tupleProposition ] = 1 else : dictPropositions[ tupleProposition ] = dictPropositions[ tupleProposition ] + 1 return dictPropositions
# OLD # for phrase sequence in list_positive # list_positive_components = list of main components of phrase sequence created by removing auxillary verbs, adjectives, adverbs (allow adv negative?) # for phrase sequence in list_negative # list_negative_components = list of main components of phrase sequence created by removing auxillary verbs, adjectives, adverbs (allow adv negative?) # ??? apply FIM and refine list_positive and list_negative # set_positive_components =- set( list_positive_components ) # set_negative_components =- set( list_negative_components ) # for components in set_positive_components # support_pos = count( components ) in list_positive_components # support_neg = count( components ) in list_negative_components # confidence = support_pos / ( support_pos + support_neg ) # if confidence > threshold # dict_final_patterns = { components : { lex_type : confidence } }