--- wntools.py.orig 2004-07-19 05:09:43 UTC +++ wntools.py @@ -33,6 +33,7 @@ __author__ = "Oliver Steele " __version__ = "2.0" from wordnet import * +from functools import reduce # # Domain utilities @@ -41,9 +42,9 @@ from wordnet import * def _requireSource(entity): if not hasattr(entity, 'pointers'): if isinstance(entity, Word): - raise TypeError, `entity` + " is not a Sense or Synset. Try " + `entity` + "[0] instead." + raise TypeError(repr(entity) + " is not a Sense or Synset. Try " + repr(entity) + "[0] instead.") else: - raise TypeError, `entity` + " is not a Sense or Synset" + raise TypeError(repr(entity) + " is not a Sense or Synset") def tree(source, pointerType): """ @@ -64,9 +65,9 @@ def tree(source, pointerType): >>> #pprint(tree(dog, HYPONYM)) # too verbose to include here """ if isinstance(source, Word): - return map(lambda s, t=pointerType:tree(s,t), source.getSenses()) + return list(map(lambda s, t=pointerType:tree(s,t), source.getSenses())) _requireSource(source) - return [source] + map(lambda s, t=pointerType:tree(s,t), source.pointerTargets(pointerType)) + return [source] + list(map(lambda s, t=pointerType:tree(s,t), source.pointerTargets(pointerType))) def closure(source, pointerType, accumulator=None): """Return the transitive closure of source under the pointerType @@ -78,7 +79,7 @@ def closure(source, pointerType, accumulator=None): ['dog' in {noun: dog, domestic dog, Canis familiaris}, {noun: canine, canid}, {noun: carnivore}, {noun: placental, placental mammal, eutherian, eutherian mammal}, {noun: mammal}, {noun: vertebrate, craniate}, {noun: chordate}, {noun: animal, animate being, beast, brute, creature, fauna}, {noun: organism, being}, {noun: living thing, animate thing}, {noun: object, physical object}, {noun: entity}] """ if isinstance(source, Word): - return reduce(union, map(lambda s, t=pointerType:tree(s,t), source.getSenses())) + return reduce(union, list(map(lambda s, t=pointerType:tree(s,t), source.getSenses()))) _requireSource(source) if accumulator is None: accumulator = [] @@ -193,7 +194,7 @@ def product(u, v): >>> product("123", "abc") [('1', 'a'), ('1', 'b'), ('1', 'c'), ('2', 'a'), ('2', 'b'), ('2', 'c'), ('3', 'a'), ('3', 'b'), ('3', 'c')] """ - return flatten1(map(lambda a, v=v:map(lambda b, a=a:(a,b), v), u)) + return flatten1(list(map(lambda a, v=v:list(map(lambda b, a=a:(a,b), v)), u))) def removeDuplicates(sequence): """Return a copy of _sequence_ with equal items removed. @@ -242,12 +243,12 @@ def getIndex(form, pos='noun'): transformed string until a match is found or all the different strings have been tried. It returns a Word or None.""" def trySubstitutions(trySubstitutions, form, substitutions, lookup=1, dictionary=dictionaryFor(pos)): - if lookup and dictionary.has_key(form): + if lookup and form in dictionary: return dictionary[form] elif substitutions: (old, new) = substitutions[0] substitute = string.replace(form, old, new) and substitute != form - if substitute and dictionary.has_key(substitute): + if substitute and substitute in dictionary: return dictionary[substitute] return trySubstitutions(trySubstitutions, form, substitutions[1:], lookup=0) or \ (substitute and trySubstitutions(trySubstitutions, substitute, substitutions[1:])) @@ -313,7 +314,7 @@ def morphy(form, pos='noun', collect=0): exceptions = binarySearchFile(excfile, form) if exceptions: form = exceptions[string.find(exceptions, ' ')+1:-1] - if lookup and dictionary.has_key(form): + if lookup and form in dictionary: if collect: collection.append(form) else: --- wordnet.py.orig 2004-07-19 06:11:31 UTC +++ wordnet.py @@ -53,9 +53,9 @@ WNHOME = environ.get('WNHOME', { 'mac': ":", 'dos': "C:\\wn16", 'nt': "C:\\Program Files\\WordNet\\2.0"} - .get(os.name, "/usr/local/wordnet2.0")) + .get(os.name, "/usr/local/share/py-wordnet")) -WNSEARCHDIR = environ.get('WNSEARCHDIR', os.path.join(WNHOME, {'mac': "Database"}.get(os.name, "dict"))) +WNSEARCHDIR = environ.get('WNSEARCHDIR', WNHOME) ReadableRepresentations = 1 """If true, repr(word), repr(sense), and repr(synset) return @@ -210,15 +210,15 @@ class Word: def __init__(self, line): """Initialize the word from a line of a WN POS file.""" - tokens = string.split(line) - ints = map(int, tokens[int(tokens[3]) + 4:]) - self.form = string.replace(tokens[0], '_', ' ') + tokens = string.split(line) + ints = list(map(int, tokens[int(tokens[3]) + 4:])) + self.form = string.replace(tokens[0], '_', ' ') "Orthographic representation of the word." - self.pos = _normalizePOS(tokens[1]) + self.pos = _normalizePOS(tokens[1]) "Part of speech. One of NOUN, VERB, ADJECTIVE, ADVERB." - self.taggedSenseCount = ints[1] + self.taggedSenseCount = ints[1] "Number of senses that are tagged." - self._synsetOffsets = ints[2:ints[0]+2] + self._synsetOffsets = ints[2:ints[0]+2] def getPointers(self, pointerType=None): """Pointers connect senses and synsets, not words. @@ -231,18 +231,18 @@ class Word: raise self.getPointers.__doc__ def getSenses(self): - """Return a sequence of senses. - - >>> N['dog'].getSenses() - ('dog' in {noun: dog, domestic dog, Canis familiaris}, 'dog' in {noun: frump, dog}, 'dog' in {noun: dog}, 'dog' in {noun: cad, bounder, blackguard, dog, hound, heel}, 'dog' in {noun: frank, frankfurter, hotdog, hot dog, dog, wiener, wienerwurst, weenie}, 'dog' in {noun: pawl, detent, click, dog}, 'dog' in {noun: andiron, firedog, dog, dog-iron}) - """ - if not hasattr(self, '_senses'): - def getSense(offset, pos=self.pos, form=self.form): - return getSynset(pos, offset)[form] - self._senses = tuple(map(getSense, self._synsetOffsets)) - del self._synsetOffsets - return self._senses + """Return a sequence of senses. + >>> N['dog'].getSenses() + ('dog' in {noun: dog, domestic dog, Canis familiaris}, 'dog' in {noun: frump, dog}, 'dog' in {noun: dog}, 'dog' in {noun: cad, bounder, blackguard, dog, hound, heel}, 'dog' in {noun: frank, frankfurter, hotdog, hot dog, dog, wiener, wienerwurst, weenie}, 'dog' in {noun: pawl, detent, click, dog}, 'dog' in {noun: andiron, firedog, dog, dog-iron}) + """ + if not hasattr(self, '_senses'): + def getSense(offset, pos=self.pos, form=self.form): + return getSynset(pos, offset)[form] + self._senses = tuple(map(getSense, self._synsetOffsets)) + del self._synsetOffsets + return self._senses + # Deprecated. Present for backwards compatability. def senses(self): import wordnet @@ -253,70 +253,70 @@ class Word: return self.getSense() def isTagged(self): - """Return 1 if any sense is tagged. - - >>> N['dog'].isTagged() - 1 - """ - return self.taggedSenseCount > 0 + """Return 1 if any sense is tagged. + + >>> N['dog'].isTagged() + 1 + """ + return self.taggedSenseCount > 0 def getAdjectivePositions(self): - """Return a sequence of adjective positions that this word can - appear in. These are elements of ADJECTIVE_POSITIONS. - - >>> ADJ['clear'].getAdjectivePositions() - [None, 'predicative'] - """ - positions = {} - for sense in self.getSenses(): - positions[sense.position] = 1 - return positions.keys() + """Return a sequence of adjective positions that this word can + appear in. These are elements of ADJECTIVE_POSITIONS. + + >>> ADJ['clear'].getAdjectivePositions() + [None, 'predicative'] + """ + positions = {} + for sense in self.getSenses(): + positions[sense.position] = 1 + return list(positions.keys()) adjectivePositions = getAdjectivePositions # backwards compatability def __cmp__(self, other): - """ - >>> N['cat'] < N['dog'] - 1 - >>> N['dog'] < V['dog'] - 1 - """ - return _compareInstances(self, other, ('pos', 'form')) + """ + >>> N['cat'] < N['dog'] + 1 + >>> N['dog'] < V['dog'] + 1 + """ + return _compareInstances(self, other, ('pos', 'form')) def __str__(self): - """Return a human-readable representation. - - >>> str(N['dog']) - 'dog(n.)' - """ - abbrs = {NOUN: 'n.', VERB: 'v.', ADJECTIVE: 'adj.', ADVERB: 'adv.'} - return self.form + "(" + abbrs[self.pos] + ")" + """Return a human-readable representation. + + >>> str(N['dog']) + 'dog(n.)' + """ + abbrs = {NOUN: 'n.', VERB: 'v.', ADJECTIVE: 'adj.', ADVERB: 'adv.'} + return self.form + "(" + abbrs[self.pos] + ")" def __repr__(self): - """If ReadableRepresentations is true, return a human-readable - representation, e.g. 'dog(n.)'. - - If ReadableRepresentations is false, return a machine-readable - representation, e.g. "getWord('dog', 'noun')". - """ - if ReadableRepresentations: - return str(self) - return "getWord" + `(self.form, self.pos)` - + """If ReadableRepresentations is true, return a human-readable + representation, e.g. 'dog(n.)'. + + If ReadableRepresentations is false, return a machine-readable + representation, e.g. "getWord('dog', 'noun')". + """ + if ReadableRepresentations: + return str(self) + return "getWord" + repr((self.form, self.pos)) + # # Sequence protocol (a Word's elements are its Senses) # - def __nonzero__(self): - return 1 + def __bool__(self): + return 1 def __len__(self): - return len(self.getSenses()) + return len(self.getSenses()) def __getitem__(self, index): - return self.getSenses()[index] + return self.getSenses()[index] def __getslice__(self, i, j): - return self.getSenses()[i:j] + return self.getSenses()[i:j] class Synset: @@ -354,157 +354,157 @@ class Synset: def __init__(self, pos, offset, line): "Initialize the synset from a line off a WN synset file." - self.pos = pos + self.pos = pos "part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB." - self.offset = offset + self.offset = offset """integer offset into the part-of-speech file. Together with pos, this can be used as a unique id.""" - tokens = string.split(line[:string.index(line, '|')]) - self.ssType = tokens[2] - self.gloss = string.strip(line[string.index(line, '|') + 1:]) + tokens = string.split(line[:string.index(line, '|')]) + self.ssType = tokens[2] + self.gloss = string.strip(line[string.index(line, '|') + 1:]) self.lexname = Lexname.lexnames[int(tokens[1])] - (self._senseTuples, remainder) = _partition(tokens[4:], 2, string.atoi(tokens[3], 16)) - (self._pointerTuples, remainder) = _partition(remainder[1:], 4, int(remainder[0])) - if pos == VERB: - (vfTuples, remainder) = _partition(remainder[1:], 3, int(remainder[0])) - def extractVerbFrames(index, vfTuples): - return tuple(map(lambda t:string.atoi(t[1]), filter(lambda t,i=index:string.atoi(t[2],16) in (0, i), vfTuples))) - senseVerbFrames = [] - for index in range(1, len(self._senseTuples) + 1): - senseVerbFrames.append(extractVerbFrames(index, vfTuples)) - self._senseVerbFrames = senseVerbFrames - self.verbFrames = tuple(extractVerbFrames(None, vfTuples)) + (self._senseTuples, remainder) = _partition(tokens[4:], 2, string.atoi(tokens[3], 16)) + (self._pointerTuples, remainder) = _partition(remainder[1:], 4, int(remainder[0])) + if pos == VERB: + (vfTuples, remainder) = _partition(remainder[1:], 3, int(remainder[0])) + def extractVerbFrames(index, vfTuples): + return tuple([string.atoi(t[1]) for t in list(filter(lambda t,i=index:string.atoi(t[2],16) in (0, i), vfTuples))]) + senseVerbFrames = [] + for index in range(1, len(self._senseTuples) + 1): + senseVerbFrames.append(extractVerbFrames(index, vfTuples)) + self._senseVerbFrames = senseVerbFrames + self.verbFrames = tuple(extractVerbFrames(None, vfTuples)) """A sequence of integers that index into VERB_FRAME_STRINGS. These list the verb frames that any Sense in this synset participates in. (See also Sense.verbFrames.) Defined only for verbs.""" def getSenses(self): - """Return a sequence of Senses. - - >>> N['dog'][0].getSenses() - ('dog' in {noun: dog, domestic dog, Canis familiaris},) - """ - if not hasattr(self, '_senses'): - def loadSense(senseTuple, verbFrames=None, synset=self): - return Sense(synset, senseTuple, verbFrames) - if self.pos == VERB: - self._senses = tuple(map(loadSense, self._senseTuples, self._senseVerbFrames)) - del self._senseVerbFrames - else: - self._senses = tuple(map(loadSense, self._senseTuples)) - del self._senseTuples - return self._senses + """Return a sequence of Senses. + + >>> N['dog'][0].getSenses() + ('dog' in {noun: dog, domestic dog, Canis familiaris},) + """ + if not hasattr(self, '_senses'): + def loadSense(senseTuple, verbFrames=None, synset=self): + return Sense(synset, senseTuple, verbFrames) + if self.pos == VERB: + self._senses = tuple(map(loadSense, self._senseTuples, self._senseVerbFrames)) + del self._senseVerbFrames + else: + self._senses = tuple(map(loadSense, self._senseTuples)) + del self._senseTuples + return self._senses senses = getSenses def getPointers(self, pointerType=None): - """Return a sequence of Pointers. + """Return a sequence of Pointers. If pointerType is specified, only pointers of that type are returned. In this case, pointerType should be an element of POINTER_TYPES. - - >>> N['dog'][0].getPointers()[:5] - (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt}) - >>> N['dog'][0].getPointers(HYPERNYM) - (hypernym -> {noun: canine, canid},) - """ - if not hasattr(self, '_pointers'): - def loadPointer(tuple, synset=self): - return Pointer(synset.offset, tuple) - self._pointers = tuple(map(loadPointer, self._pointerTuples)) - del self._pointerTuples - if pointerType == None: - return self._pointers - else: - _requirePointerType(pointerType) - return filter(lambda pointer, type=pointerType: pointer.type == type, self._pointers) + + >>> N['dog'][0].getPointers()[:5] + (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt}) + >>> N['dog'][0].getPointers(HYPERNYM) + (hypernym -> {noun: canine, canid},) + """ + if not hasattr(self, '_pointers'): + def loadPointer(tuple, synset=self): + return Pointer(synset.offset, tuple) + self._pointers = tuple(map(loadPointer, self._pointerTuples)) + del self._pointerTuples + if pointerType == None: + return self._pointers + else: + _requirePointerType(pointerType) + return list(filter(lambda pointer, type=pointerType: pointer.type == type, self._pointers)) pointers = getPointers # backwards compatability def getPointerTargets(self, pointerType=None): - """Return a sequence of Senses or Synsets. - + """Return a sequence of Senses or Synsets. + If pointerType is specified, only targets of pointers of that type are returned. In this case, pointerType should be an element of POINTER_TYPES. - - >>> N['dog'][0].getPointerTargets()[:5] - [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}] - >>> N['dog'][0].getPointerTargets(HYPERNYM) - [{noun: canine, canid}] - """ - return map(Pointer.target, self.getPointers(pointerType)) + + >>> N['dog'][0].getPointerTargets()[:5] + [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}] + >>> N['dog'][0].getPointerTargets(HYPERNYM) + [{noun: canine, canid}] + """ + return list(map(Pointer.target, self.getPointers(pointerType))) pointerTargets = getPointerTargets # backwards compatability def isTagged(self): - """Return 1 if any sense is tagged. - - >>> N['dog'][0].isTagged() - 1 - >>> N['dog'][1].isTagged() - 0 - """ - return len(filter(Sense.isTagged, self.getSenses())) > 0 + """Return 1 if any sense is tagged. + + >>> N['dog'][0].isTagged() + 1 + >>> N['dog'][1].isTagged() + 0 + """ + return len(list(filter(Sense.isTagged, self.getSenses()))) > 0 def __str__(self): - """Return a human-readable representation. - - >>> str(N['dog'][0].synset) - '{noun: dog, domestic dog, Canis familiaris}' - """ - return "{" + self.pos + ": " + string.joinfields(map(lambda sense:sense.form, self.getSenses()), ", ") + "}" + """Return a human-readable representation. + + >>> str(N['dog'][0].synset) + '{noun: dog, domestic dog, Canis familiaris}' + """ + return "{" + self.pos + ": " + string.joinfields([sense.form for sense in self.getSenses()], ", ") + "}" def __repr__(self): - """If ReadableRepresentations is true, return a human-readable - representation, e.g. 'dog(n.)'. - - If ReadableRepresentations is false, return a machine-readable - representation, e.g. "getSynset(pos, 1234)". - """ - if ReadableRepresentations: - return str(self) - return "getSynset" + `(self.pos, self.offset)` + """If ReadableRepresentations is true, return a human-readable + representation, e.g. 'dog(n.)'. + + If ReadableRepresentations is false, return a machine-readable + representation, e.g. "getSynset(pos, 1234)". + """ + if ReadableRepresentations: + return str(self) + return "getSynset" + repr((self.pos, self.offset)) def __cmp__(self, other): - return _compareInstances(self, other, ('pos', 'offset')) + return _compareInstances(self, other, ('pos', 'offset')) # # Sequence protocol (a Synset's elements are its senses). # - def __nonzero__(self): - return 1 + def __bool__(self): + return 1 def __len__(self): - """ - >>> len(N['dog'][0].synset) - 3 - """ - return len(self.getSenses()) + """ + >>> len(N['dog'][0].synset) + 3 + """ + return len(self.getSenses()) def __getitem__(self, idx): - """ - >>> N['dog'][0].synset[0] == N['dog'][0] - 1 - >>> N['dog'][0].synset['dog'] == N['dog'][0] - 1 - >>> N['dog'][0].synset[N['dog']] == N['dog'][0] - 1 - >>> N['cat'][6] - 'cat' in {noun: big cat, cat} - """ - senses = self.getSenses() - if isinstance(idx, Word): - idx = idx.form - if isinstance(idx, StringType): - idx = _index(idx, map(lambda sense:sense.form, senses)) or \ - _index(idx, map(lambda sense:sense.form, senses), _equalsIgnoreCase) - return senses[idx] + """ + >>> N['dog'][0].synset[0] == N['dog'][0] + 1 + >>> N['dog'][0].synset['dog'] == N['dog'][0] + 1 + >>> N['dog'][0].synset[N['dog']] == N['dog'][0] + 1 + >>> N['cat'][6] + 'cat' in {noun: big cat, cat} + """ + senses = self.getSenses() + if isinstance(idx, Word): + idx = idx.form + if isinstance(idx, StringType): + idx = _index(idx, [sense.form for sense in senses]) or \ + _index(idx, [sense.form for sense in senses], _equalsIgnoreCase) + return senses[idx] def __getslice__(self, i, j): - return self.getSenses()[i:j] + return self.getSenses()[i:j] class Sense: @@ -525,7 +525,7 @@ class Sense: VERB_FRAME_STRINGS. These list the verb frames that this Sense partipates in. Defined only for verbs. - >>> decide = V['decide'][0].synset # first synset for 'decide' + >>> decide = V['decide'][0].synset # first synset for 'decide' >>> decide[0].verbFrames (8, 2, 26, 29) >>> decide[1].verbFrames @@ -536,124 +536,124 @@ class Sense: def __init__(sense, synset, senseTuple, verbFrames=None): "Initialize a sense from a synset's senseTuple." - # synset is stored by key (pos, synset) rather than object - # reference, to avoid creating a circular reference between - # Senses and Synsets that will prevent the vm from - # garbage-collecting them. - sense.pos = synset.pos + # synset is stored by key (pos, synset) rather than object + # reference, to avoid creating a circular reference between + # Senses and Synsets that will prevent the vm from + # garbage-collecting them. + sense.pos = synset.pos "part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB" - sense.synsetOffset = synset.offset + sense.synsetOffset = synset.offset "synset key. This is used to retrieve the sense." - sense.verbFrames = verbFrames + sense.verbFrames = verbFrames """A sequence of integers that index into VERB_FRAME_STRINGS. These list the verb frames that this Sense partipates in. Defined only for verbs.""" - (form, idString) = senseTuple - sense.position = None - if '(' in form: - index = string.index(form, '(') - key = form[index + 1:-1] - form = form[:index] - if key == 'a': - sense.position = ATTRIBUTIVE - elif key == 'p': - sense.position = PREDICATIVE - elif key == 'ip': - sense.position = IMMEDIATE_POSTNOMINAL - else: - raise "unknown attribute " + key - sense.form = string.replace(form, '_', ' ') + (form, idString) = senseTuple + sense.position = None + if '(' in form: + index = string.index(form, '(') + key = form[index + 1:-1] + form = form[:index] + if key == 'a': + sense.position = ATTRIBUTIVE + elif key == 'p': + sense.position = PREDICATIVE + elif key == 'ip': + sense.position = IMMEDIATE_POSTNOMINAL + else: + raise "unknown attribute " + key + sense.form = string.replace(form, '_', ' ') "orthographic representation of the Word this is a Sense of." def __getattr__(self, name): - # see the note at __init__ about why 'synset' is provided as a - # 'virtual' slot - if name == 'synset': - return getSynset(self.pos, self.synsetOffset) + # see the note at __init__ about why 'synset' is provided as a + # 'virtual' slot + if name == 'synset': + return getSynset(self.pos, self.synsetOffset) elif name == 'lexname': return self.synset.lexname - else: - raise AttributeError, name + else: + raise AttributeError(name) def __str__(self): - """Return a human-readable representation. - - >>> str(N['dog']) - 'dog(n.)' - """ - return `self.form` + " in " + str(self.synset) + """Return a human-readable representation. + + >>> str(N['dog']) + 'dog(n.)' + """ + return repr(self.form) + " in " + str(self.synset) def __repr__(self): - """If ReadableRepresentations is true, return a human-readable - representation, e.g. 'dog(n.)'. - - If ReadableRepresentations is false, return a machine-readable - representation, e.g. "getWord('dog', 'noun')". - """ - if ReadableRepresentations: - return str(self) - return "%s[%s]" % (`self.synset`, `self.form`) + """If ReadableRepresentations is true, return a human-readable + representation, e.g. 'dog(n.)'. + + If ReadableRepresentations is false, return a machine-readable + representation, e.g. "getWord('dog', 'noun')". + """ + if ReadableRepresentations: + return str(self) + return "%s[%s]" % (repr(self.synset), repr(self.form)) def getPointers(self, pointerType=None): - """Return a sequence of Pointers. - + """Return a sequence of Pointers. + If pointerType is specified, only pointers of that type are returned. In this case, pointerType should be an element of POINTER_TYPES. - - >>> N['dog'][0].getPointers()[:5] - (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt}) - >>> N['dog'][0].getPointers(HYPERNYM) - (hypernym -> {noun: canine, canid},) - """ - senseIndex = _index(self, self.synset.getSenses()) - def pointsFromThisSense(pointer, selfIndex=senseIndex): - return pointer.sourceIndex == 0 or pointer.sourceIndex - 1 == selfIndex - return filter(pointsFromThisSense, self.synset.getPointers(pointerType)) + + >>> N['dog'][0].getPointers()[:5] + (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt}) + >>> N['dog'][0].getPointers(HYPERNYM) + (hypernym -> {noun: canine, canid},) + """ + senseIndex = _index(self, self.synset.getSenses()) + def pointsFromThisSense(pointer, selfIndex=senseIndex): + return pointer.sourceIndex == 0 or pointer.sourceIndex - 1 == selfIndex + return list(filter(pointsFromThisSense, self.synset.getPointers(pointerType))) pointers = getPointers # backwards compatability def getPointerTargets(self, pointerType=None): - """Return a sequence of Senses or Synsets. - + """Return a sequence of Senses or Synsets. + If pointerType is specified, only targets of pointers of that type are returned. In this case, pointerType should be an element of POINTER_TYPES. - - >>> N['dog'][0].getPointerTargets()[:5] - [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}] - >>> N['dog'][0].getPointerTargets(HYPERNYM) - [{noun: canine, canid}] - """ - return map(Pointer.target, self.getPointers(pointerType)) + + >>> N['dog'][0].getPointerTargets()[:5] + [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}] + >>> N['dog'][0].getPointerTargets(HYPERNYM) + [{noun: canine, canid}] + """ + return list(map(Pointer.target, self.getPointers(pointerType))) pointerTargets = getPointerTargets # backwards compatability def getSenses(self): - return self, + return self, senses = getSenses # backwards compatability def isTagged(self): - """Return 1 if any sense is tagged. - - >>> N['dog'][0].isTagged() - 1 - >>> N['dog'][1].isTagged() - 0 - """ - word = self.word() - return _index(self, word.getSenses()) < word.taggedSenseCount + """Return 1 if any sense is tagged. + + >>> N['dog'][0].isTagged() + 1 + >>> N['dog'][1].isTagged() + 0 + """ + word = self.word() + return _index(self, word.getSenses()) < word.taggedSenseCount def getWord(self): - return getWord(self.form, self.pos) + return getWord(self.form, self.pos) word = getWord # backwards compatability def __cmp__(self, other): - def senseIndex(sense, synset=self.synset): - return _index(sense, synset.getSenses(), testfn=lambda a,b: a.form == b.form) - return _compareInstances(self, other, ('synset',)) or cmp(senseIndex(self), senseIndex(other)) + def senseIndex(sense, synset=self.synset): + return _index(sense, synset.getSenses(), testfn=lambda a,b: a.form == b.form) + return _compareInstances(self, other, ('synset',)) or cmp(senseIndex(self), senseIndex(other)) class Pointer: @@ -668,21 +668,21 @@ class Pointer: """ _POINTER_TYPE_TABLE = { - '!': ANTONYM, + '!': ANTONYM, '@': HYPERNYM, '~': HYPONYM, - '=': ATTRIBUTE, + '=': ATTRIBUTE, '^': ALSO_SEE, '*': ENTAILMENT, '>': CAUSE, - '$': VERB_GROUP, - '#m': MEMBER_MERONYM, + '$': VERB_GROUP, + '#m': MEMBER_MERONYM, '#s': SUBSTANCE_MERONYM, '#p': PART_MERONYM, - '%m': MEMBER_HOLONYM, + '%m': MEMBER_HOLONYM, '%s': SUBSTANCE_HOLONYM, '%p': PART_HOLONYM, - '&': SIMILAR, + '&': SIMILAR, '<': PARTICIPLE_OF, '\\': PERTAINYM, # New in wn 2.0: @@ -696,51 +696,51 @@ class Pointer: } def __init__(self, sourceOffset, pointerTuple): - (type, offset, pos, indices) = pointerTuple - self.type = Pointer._POINTER_TYPE_TABLE[type] + (type, offset, pos, indices) = pointerTuple + self.type = Pointer._POINTER_TYPE_TABLE[type] """One of POINTER_TYPES.""" - self.sourceOffset = sourceOffset - self.targetOffset = int(offset) - self.pos = _normalizePOS(pos) + self.sourceOffset = sourceOffset + self.targetOffset = int(offset) + self.pos = _normalizePOS(pos) """part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB""" - indices = string.atoi(indices, 16) - self.sourceIndex = indices >> 8 - self.targetIndex = indices & 255 + indices = string.atoi(indices, 16) + self.sourceIndex = indices >> 8 + self.targetIndex = indices & 255 def getSource(self): - synset = getSynset(self.pos, self.sourceOffset) - if self.sourceIndex: - return synset[self.sourceIndex - 1] - else: - return synset + synset = getSynset(self.pos, self.sourceOffset) + if self.sourceIndex: + return synset[self.sourceIndex - 1] + else: + return synset source = getSource # backwards compatability def getTarget(self): - synset = getSynset(self.pos, self.targetOffset) - if self.targetIndex: - return synset[self.targetIndex - 1] - else: - return synset + synset = getSynset(self.pos, self.targetOffset) + if self.targetIndex: + return synset[self.targetIndex - 1] + else: + return synset target = getTarget # backwards compatability def __str__(self): - return self.type + " -> " + str(self.target()) + return self.type + " -> " + str(self.target()) def __repr__(self): - if ReadableRepresentations: - return str(self) - return "<" + str(self) + ">" + if ReadableRepresentations: + return str(self) + return "<" + str(self) + ">" def __cmp__(self, other): - diff = _compareInstances(self, other, ('pos', 'sourceOffset')) - if diff: - return diff - synset = self.source() - def pointerIndex(sense, synset=synset): - return _index(sense, synset.getPointers(), testfn=lambda a,b: not _compareInstances(a, b, ('type', 'sourceIndex', 'targetIndex'))) - return cmp(pointerIndex(self), pointerIndex(other)) + diff = _compareInstances(self, other, ('pos', 'sourceOffset')) + if diff: + return diff + synset = self.source() + def pointerIndex(sense, synset=synset): + return _index(sense, synset.getPointers(), testfn=lambda a,b: not _compareInstances(a, b, ('type', 'sourceIndex', 'targetIndex'))) + return cmp(pointerIndex(self), pointerIndex(other)) # Loading the lexnames @@ -794,59 +794,59 @@ class Dictionary: """ def __init__(self, pos, filenameroot): - self.pos = pos + self.pos = pos """part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB""" - self.indexFile = _IndexFile(pos, filenameroot) - self.dataFile = open(_dataFilePathname(filenameroot), _FILE_OPEN_MODE) + self.indexFile = _IndexFile(pos, filenameroot) + self.dataFile = open(_dataFilePathname(filenameroot), _FILE_OPEN_MODE) def __repr__(self): - dictionaryVariables = {N: 'N', V: 'V', ADJ: 'ADJ', ADV: 'ADV'} - if dictionaryVariables.get(self): - return self.__module__ + "." + dictionaryVariables[self] - return "<%s.%s instance for %s>" % (self.__module__, "Dictionary", self.pos) + dictionaryVariables = {N: 'N', V: 'V', ADJ: 'ADJ', ADV: 'ADV'} + if dictionaryVariables.get(self): + return self.__module__ + "." + dictionaryVariables[self] + return "<%s.%s instance for %s>" % (self.__module__, "Dictionary", self.pos) def getWord(self, form, line=None): - key = string.replace(string.lower(form), ' ', '_') - pos = self.pos - def loader(key=key, line=line, indexFile=self.indexFile): - line = line or indexFile.get(key) - return line and Word(line) - word = _entityCache.get((pos, key), loader) - if word: - return word - else: - raise KeyError, "%s is not in the %s database" % (`form`, `pos`) + key = string.replace(string.lower(form), ' ', '_') + pos = self.pos + def loader(key=key, line=line, indexFile=self.indexFile): + line = line or indexFile.get(key) + return line and Word(line) + word = _entityCache.get((pos, key), loader) + if word: + return word + else: + raise KeyError("%s is not in the %s database" % (repr(form), repr(pos))) def getSynset(self, offset): - pos = self.pos - def loader(pos=pos, offset=offset, dataFile=self.dataFile): - return Synset(pos, offset, _lineAt(dataFile, offset)) - return _entityCache.get((pos, offset), loader) + pos = self.pos + def loader(pos=pos, offset=offset, dataFile=self.dataFile): + return Synset(pos, offset, _lineAt(dataFile, offset)) + return _entityCache.get((pos, offset), loader) def _buildIndexCacheFile(self): - self.indexFile._buildIndexCacheFile() + self.indexFile._buildIndexCacheFile() # # Sequence protocol (a Dictionary's items are its Words) # - def __nonzero__(self): - """Return false. (This is to avoid scanning the whole index file - to compute len when a Dictionary is used in test position.) - - >>> N and 'true' - 'true' - """ - return 1 + def __bool__(self): + """Return false. (This is to avoid scanning the whole index file + to compute len when a Dictionary is used in test position.) + + >>> N and 'true' + 'true' + """ + return 1 def __len__(self): - """Return the number of index entries. - - >>> len(ADJ) - 21435 - """ - if not hasattr(self, 'length'): - self.length = len(self.indexFile) - return self.length + """Return the number of index entries. + + >>> len(ADJ) + 21435 + """ + if not hasattr(self, 'length'): + self.length = len(self.indexFile) + return self.length def __getslice__(self, a, b): results = [] @@ -860,22 +860,22 @@ class Dictionary: return results def __getitem__(self, index): - """If index is a String, return the Word whose form is - index. If index is an integer n, return the Word - indexed by the n'th Word in the Index file. - - >>> N['dog'] - dog(n.) - >>> N[0] - 'hood(n.) - """ - if isinstance(index, StringType): - return self.getWord(index) - elif isinstance(index, IntType): - line = self.indexFile[index] - return self.getWord(string.replace(line[:string.find(line, ' ')], '_', ' '), line) - else: - raise TypeError, "%s is not a String or Int" % `index` + """If index is a String, return the Word whose form is + index. If index is an integer n, return the Word + indexed by the n'th Word in the Index file. + + >>> N['dog'] + dog(n.) + >>> N[0] + 'hood(n.) + """ + if isinstance(index, StringType): + return self.getWord(index) + elif isinstance(index, IntType): + line = self.indexFile[index] + return self.getWord(string.replace(line[:string.find(line, ' ')], '_', ' '), line) + else: + raise TypeError("%s is not a String or Int" % repr(index)) # # Dictionary protocol @@ -884,54 +884,54 @@ class Dictionary: # def get(self, key, default=None): - """Return the Word whose form is _key_, or _default_. - - >>> N.get('dog') - dog(n.) - >>> N.get('inu') - """ - try: - return self[key] - except LookupError: - return default + """Return the Word whose form is _key_, or _default_. + + >>> N.get('dog') + dog(n.) + >>> N.get('inu') + """ + try: + return self[key] + except LookupError: + return default def keys(self): - """Return a sorted list of strings that index words in this - dictionary.""" - return self.indexFile.keys() + """Return a sorted list of strings that index words in this + dictionary.""" + return list(self.indexFile.keys()) def has_key(self, form): - """Return true iff the argument indexes a word in this dictionary. - - >>> N.has_key('dog') - 1 - >>> N.has_key('inu') - 0 - """ - return self.indexFile.has_key(form) + """Return true iff the argument indexes a word in this dictionary. + + >>> N.has_key('dog') + 1 + >>> N.has_key('inu') + 0 + """ + return form in self.indexFile # # Testing # def _testKeys(self): - """Verify that index lookup can find each word in the index file.""" - print "Testing: ", self - file = open(self.indexFile.file.name, _FILE_OPEN_MODE) - counter = 0 - while 1: - line = file.readline() - if line == '': break - if line[0] != ' ': - key = string.replace(line[:string.find(line, ' ')], '_', ' ') - if (counter % 1000) == 0: - print "%s..." % (key,), - import sys - sys.stdout.flush() - counter = counter + 1 - self[key] - file.close() - print "done." + """Verify that index lookup can find each word in the index file.""" + print("Testing: ", self) + file = open(self.indexFile.file.name, _FILE_OPEN_MODE) + counter = 0 + while 1: + line = file.readline() + if line == '': break + if line[0] != ' ': + key = string.replace(line[:string.find(line, ' ')], '_', ' ') + if (counter % 1000) == 0: + print("%s..." % (key,), end=' ') + import sys + sys.stdout.flush() + counter = counter + 1 + self[key] + file.close() + print("done.") class _IndexFile: @@ -939,69 +939,69 @@ class _IndexFile: Sequence and Dictionary interface to a sorted index file.""" def __init__(self, pos, filenameroot): - self.pos = pos - self.file = open(_indexFilePathname(filenameroot), _FILE_OPEN_MODE) - self.offsetLineCache = {} # Table of (pathname, offset) -> (line, nextOffset) - self.rewind() - self.shelfname = os.path.join(WNSEARCHDIR, pos + ".pyidx") - try: - import shelve - self.indexCache = shelve.open(self.shelfname, 'r') - except: - pass + self.pos = pos + self.file = open(_indexFilePathname(filenameroot), _FILE_OPEN_MODE) + self.offsetLineCache = {} # Table of (pathname, offset) -> (line, nextOffset) + self.rewind() + self.shelfname = os.path.join(WNSEARCHDIR, pos + ".pyidx") + try: + import shelve + self.indexCache = shelve.open(self.shelfname, 'r') + except: + pass def rewind(self): - self.file.seek(0) - while 1: - offset = self.file.tell() - line = self.file.readline() - if (line[0] != ' '): - break - self.nextIndex = 0 - self.nextOffset = offset + self.file.seek(0) + while 1: + offset = self.file.tell() + line = self.file.readline() + if (line[0] != ' '): + break + self.nextIndex = 0 + self.nextOffset = offset # # Sequence protocol (an _IndexFile's items are its lines) # - def __nonzero__(self): - return 1 + def __bool__(self): + return 1 def __len__(self): - if hasattr(self, 'indexCache'): - return len(self.indexCache) - self.rewind() - lines = 0 - while 1: - line = self.file.readline() - if line == "": - break - lines = lines + 1 - return lines + if hasattr(self, 'indexCache'): + return len(self.indexCache) + self.rewind() + lines = 0 + while 1: + line = self.file.readline() + if line == "": + break + lines = lines + 1 + return lines - def __nonzero__(self): - return 1 + def __bool__(self): + return 1 def __getitem__(self, index): - if isinstance(index, StringType): - if hasattr(self, 'indexCache'): - return self.indexCache[index] - return binarySearchFile(self.file, index, self.offsetLineCache, 8) - elif isinstance(index, IntType): - if hasattr(self, 'indexCache'): - return self.get(self.keys[index]) - if index < self.nextIndex: - self.rewind() - while self.nextIndex <= index: - self.file.seek(self.nextOffset) - line = self.file.readline() - if line == "": - raise IndexError, "index out of range" - self.nextIndex = self.nextIndex + 1 - self.nextOffset = self.file.tell() - return line - else: - raise TypeError, "%s is not a String or Int" % `index` - + if isinstance(index, StringType): + if hasattr(self, 'indexCache'): + return self.indexCache[index] + return binarySearchFile(self.file, index, self.offsetLineCache, 8) + elif isinstance(index, IntType): + if hasattr(self, 'indexCache'): + return self.get(self.keys[index]) + if index < self.nextIndex: + self.rewind() + while self.nextIndex <= index: + self.file.seek(self.nextOffset) + line = self.file.readline() + if line == "": + raise IndexError("index out of range") + self.nextIndex = self.nextIndex + 1 + self.nextOffset = self.file.tell() + return line + else: + raise TypeError("%s is not a String or Int" % repr(index)) + # # Dictionary protocol # @@ -1009,62 +1009,62 @@ class _IndexFile: # def get(self, key, default=None): - try: - return self[key] - except LookupError: - return default + try: + return self[key] + except LookupError: + return default def keys(self): - if hasattr(self, 'indexCache'): - keys = self.indexCache.keys() - keys.sort() - return keys - else: - keys = [] - self.rewind() - while 1: - line = self.file.readline() - if not line: break + if hasattr(self, 'indexCache'): + keys = list(self.indexCache.keys()) + keys.sort() + return keys + else: + keys = [] + self.rewind() + while 1: + line = self.file.readline() + if not line: break key = line.split(' ', 1)[0] - keys.append(key.replace('_', ' ')) - return keys + keys.append(key.replace('_', ' ')) + return keys def has_key(self, key): - key = key.replace(' ', '_') # test case: V['haze over'] - if hasattr(self, 'indexCache'): - return self.indexCache.has_key(key) - return self.get(key) != None + key = key.replace(' ', '_') # test case: V['haze over'] + if hasattr(self, 'indexCache'): + return key in self.indexCache + return self.get(key) != None # # Index file # def _buildIndexCacheFile(self): - import shelve - import os - print "Building %s:" % (self.shelfname,), - tempname = self.shelfname + ".temp" - try: - indexCache = shelve.open(tempname) - self.rewind() - count = 0 - while 1: - offset, line = self.file.tell(), self.file.readline() - if not line: break - key = line[:string.find(line, ' ')] - if (count % 1000) == 0: - print "%s..." % (key,), - import sys - sys.stdout.flush() - indexCache[key] = line - count = count + 1 - indexCache.close() - os.rename(tempname, self.shelfname) - finally: - try: os.remove(tempname) - except: pass - print "done." - self.indexCache = shelve.open(self.shelfname, 'r') + import shelve + import os + print("Building %s:" % (self.shelfname,), end=' ') + tempname = self.shelfname + ".temp" + try: + indexCache = shelve.open(tempname) + self.rewind() + count = 0 + while 1: + offset, line = self.file.tell(), self.file.readline() + if not line: break + key = line[:string.find(line, ' ')] + if (count % 1000) == 0: + print("%s..." % (key,), end=' ') + import sys + sys.stdout.flush() + indexCache[key] = line + count = count + 1 + indexCache.close() + os.rename(tempname, self.shelfname) + finally: + try: os.remove(tempname) + except: pass + print("done.") + self.indexCache = shelve.open(self.shelfname, 'r') # @@ -1091,20 +1091,20 @@ getword, getsense, getsynset = getWord, getSense, getS def _requirePointerType(pointerType): if pointerType not in POINTER_TYPES: - raise TypeError, `pointerType` + " is not a pointer type" + raise TypeError(repr(pointerType) + " is not a pointer type") return pointerType def _compareInstances(a, b, fields): """"Return -1, 0, or 1 according to a comparison first by type, then by class, and finally by each of fields.""" # " <- for emacs if not hasattr(b, '__class__'): - return cmp(type(a), type(b)) + return cmp(type(a), type(b)) elif a.__class__ != b.__class__: - return cmp(a.__class__, b.__class__) + return cmp(a.__class__, b.__class__) for field in fields: - diff = cmp(getattr(a, field), getattr(b, field)) - if diff: - return diff + diff = cmp(getattr(a, field), getattr(b, field)) + if diff: + return diff return 0 def _equalsIgnoreCase(a, b): @@ -1122,14 +1122,14 @@ def _equalsIgnoreCase(a, b): # def _dataFilePathname(filenameroot): if os.name in ('dos', 'nt'): - path = os.path.join(WNSEARCHDIR, filenameroot + ".dat") + path = os.path.join(WNSEARCHDIR, filenameroot + ".dat") if os.path.exists(path): return path return os.path.join(WNSEARCHDIR, "data." + filenameroot) def _indexFilePathname(filenameroot): if os.name in ('dos', 'nt'): - path = os.path.join(WNSEARCHDIR, filenameroot + ".idx") + path = os.path.join(WNSEARCHDIR, filenameroot + ".idx") if os.path.exists(path): return path return os.path.join(WNSEARCHDIR, "index." + filenameroot) @@ -1146,30 +1146,30 @@ def binarySearchFile(file, key, cache={}, cacheDepth=- #if count > 20: # raise "infinite loop" lastState = start, end - middle = (start + end) / 2 - if cache.get(middle): - offset, line = cache[middle] - else: - file.seek(max(0, middle - 1)) - if middle > 0: - file.readline() - offset, line = file.tell(), file.readline() - if currentDepth < cacheDepth: - cache[middle] = (offset, line) + middle = (start + end) / 2 + if cache.get(middle): + offset, line = cache[middle] + else: + file.seek(max(0, middle - 1)) + if middle > 0: + file.readline() + offset, line = file.tell(), file.readline() + if currentDepth < cacheDepth: + cache[middle] = (offset, line) #print start, middle, end, offset, line, - if offset > end: - assert end != middle - 1, "infinite loop" - end = middle - 1 - elif line[:keylen] == key:# and line[keylen + 1] == ' ': - return line + if offset > end: + assert end != middle - 1, "infinite loop" + end = middle - 1 + elif line[:keylen] == key:# and line[keylen + 1] == ' ': + return line #elif offset == end: # return None - elif line > key: - assert end != middle - 1, "infinite loop" - end = middle - 1 - elif line < key: - start = offset + len(line) - 1 - currentDepth = currentDepth + 1 + elif line > key: + assert end != middle - 1, "infinite loop" + end = middle - 1 + elif line < key: + start = offset + len(line) - 1 + currentDepth = currentDepth + 1 thisState = start, end if lastState == thisState: # detects the condition where we're searching past the end @@ -1198,12 +1198,12 @@ def _index(key, sequence, testfn=None, keyfn=None): """ index = 0 for element in sequence: - value = element - if keyfn: - value = keyfn(value) - if (not testfn and value == key) or (testfn and testfn(value, key)): - return index - index = index + 1 + value = element + if keyfn: + value = keyfn(value) + if (not testfn and value == key) or (testfn and testfn(value, key)): + return index + index = index + 1 return None def _partition(sequence, size, count): @@ -1216,7 +1216,7 @@ def _partition(sequence, size, count): partitions = [] for index in range(0, size * count, size): - partitions.append(sequence[index:index + size]) + partitions.append(sequence[index:index + size]) return (partitions, sequence[size * count:]) @@ -1261,49 +1261,49 @@ class _LRUCache: but the two implementations aren't directly comparable.""" def __init__(this, capacity): - this.capacity = capacity - this.clear() + this.capacity = capacity + this.clear() def clear(this): - this.values = {} - this.history = {} - this.oldestTimestamp = 0 - this.nextTimestamp = 1 + this.values = {} + this.history = {} + this.oldestTimestamp = 0 + this.nextTimestamp = 1 def removeOldestEntry(this): - while this.oldestTimestamp < this.nextTimestamp: - if this.history.get(this.oldestTimestamp): - key = this.history[this.oldestTimestamp] - del this.history[this.oldestTimestamp] - del this.values[key] - return - this.oldestTimestamp = this.oldestTimestamp + 1 + while this.oldestTimestamp < this.nextTimestamp: + if this.history.get(this.oldestTimestamp): + key = this.history[this.oldestTimestamp] + del this.history[this.oldestTimestamp] + del this.values[key] + return + this.oldestTimestamp = this.oldestTimestamp + 1 def setCapacity(this, capacity): - if capacity == 0: - this.clear() - else: - this.capacity = capacity - while len(this.values) > this.capacity: - this.removeOldestEntry() + if capacity == 0: + this.clear() + else: + this.capacity = capacity + while len(this.values) > this.capacity: + this.removeOldestEntry() def get(this, key, loadfn=None): - value = None - if this.values: - pair = this.values.get(key) - if pair: - (value, timestamp) = pair - del this.history[timestamp] - if value == None: - value = loadfn and loadfn() - if this.values != None: - timestamp = this.nextTimestamp - this.nextTimestamp = this.nextTimestamp + 1 - this.values[key] = (value, timestamp) - this.history[timestamp] = key - if len(this.values) > this.capacity: - this.removeOldestEntry() - return value + value = None + if this.values: + pair = this.values.get(key) + if pair: + (value, timestamp) = pair + del this.history[timestamp] + if value == None: + value = loadfn and loadfn() + if this.values != None: + timestamp = this.nextTimestamp + this.nextTimestamp = this.nextTimestamp + 1 + this.values[key] = (value, timestamp) + this.history[timestamp] = key + if len(this.values) > this.capacity: + this.removeOldestEntry() + return value class _NullCache: @@ -1311,10 +1311,10 @@ class _NullCache: LRUCache implements), but doesn't store any values.""" def clear(): - pass + pass def get(this, key, loadfn=None): - return loadfn and loadfn() + return loadfn and loadfn() DEFAULT_CACHE_CAPACITY = 1000 @@ -1327,7 +1327,7 @@ def disableCache(): def enableCache(): """Enable the entity cache.""" if not isinstance(_entityCache, LRUCache): - _entityCache = _LRUCache(size) + _entityCache = _LRUCache(size) def clearCache(): """Clear the entity cache.""" @@ -1365,36 +1365,36 @@ def _initializePOSTables(): _POSNormalizationTable = {} _POStoDictionaryTable = {} for pos, abbreviations in ( - (NOUN, "noun n n."), - (VERB, "verb v v."), - (ADJECTIVE, "adjective adj adj. a s"), - (ADVERB, "adverb adv adv. r")): - tokens = string.split(abbreviations) - for token in tokens: - _POSNormalizationTable[token] = pos - _POSNormalizationTable[string.upper(token)] = pos + (NOUN, "noun n n."), + (VERB, "verb v v."), + (ADJECTIVE, "adjective adj adj. a s"), + (ADVERB, "adverb adv adv. r")): + tokens = string.split(abbreviations) + for token in tokens: + _POSNormalizationTable[token] = pos + _POSNormalizationTable[string.upper(token)] = pos for dict in Dictionaries: - _POSNormalizationTable[dict] = dict.pos - _POStoDictionaryTable[dict.pos] = dict + _POSNormalizationTable[dict] = dict.pos + _POStoDictionaryTable[dict.pos] = dict _initializePOSTables() def _normalizePOS(pos): norm = _POSNormalizationTable.get(pos) if norm: - return norm - raise TypeError, `pos` + " is not a part of speech type" + return norm + raise TypeError(repr(pos) + " is not a part of speech type") def _dictionaryFor(pos): pos = _normalizePOS(pos) dict = _POStoDictionaryTable.get(pos) if dict == None: - raise RuntimeError, "The " + `pos` + " dictionary has not been created" + raise RuntimeError("The " + repr(pos) + " dictionary has not been created") return dict def buildIndexFiles(): for dict in Dictionaries: - dict._buildIndexCacheFile() + dict._buildIndexCacheFile() # @@ -1404,7 +1404,7 @@ def buildIndexFiles(): def _testKeys(): #This is slow, so don't do it as part of the normal test procedure. for dictionary in Dictionaries: - dictionary._testKeys() + dictionary._testKeys() def _test(reset=0): import doctest, wordnet