1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
#! /usr/bin/perl -w
# Wordnet dictionary database converter
#
# Converts the Wordnet prolog data to rockbox dictionary format.
#
# Written by Miika Pekkarinen <slasher@ihme.org>
#
# $Id$
use strict;
# Lookup tables
my %words;
my %descriptions;
sub getcatname {
my ($id) = @_;
return 'N' if $id == 1;
return 'V' if $id == 2;
return 'A' if $id == 3;
return 'A' if $id == 4;
return '?';
}
open IN_WORD, "wn_s.pl" or die "Open fail(#1): $!";
open IN_DESC, "wn_g.pl" or die "Open fail(#2): $!";
open OUTPUT, "> dict.preparsed" or die "Open fail(#3): $!";
print "Reading word file...\n";
# Read everything into memory
while (<IN_WORD>) {
chomp ;
# s(100001740,1,'entity',n,1,11). => 100001740,1,'entity',n,1,11
s/(^s\()(.*)(\)\.$)/$2/;
my ($seqid, $n1, $word, $n2, $n3, $n4) = split /,/, $_, 6;
# 'entity' => entity
$word =~ s/(^\')(.*)(\'$)/$2/;
$word =~ s/\'\'/\'/s;
my $category = substr $seqid, 0, 1;
$words{lc $word}{$seqid} = $category;
}
close IN_WORD;
print "Reading description file...\n";
while (<IN_DESC>) {
chomp ;
# g(100002056,'(a separate and self-contained entity)').
# => 100002056,'(a separate and self-contained entity)'
s/(^g\()(.*)(\)\.$)/$2/;
my ($seqid, $desc) = split /,/, $_, 2;
$desc =~ s/(^\'\()(.*)(\)\'$)/$2/;
$desc =~ s/\'\'/\'/s;
$descriptions{$seqid} = $desc;
}
close IN_DESC;
print "Sorting and writing output...\n";
# Now sort and find correct descriptions
foreach my $word (sort keys %words) {
my %categories;
# Find all definitions of the word
foreach my $id (keys %{$words{$word}}) {
my $catid = $words{$word}{$id};
my $description = $descriptions{$id};
if (!defined($description) or $description eq '') {
print "Error: Failed to link word: $word / ",
$words{$word}, "\n";
exit 1;
}
push @{$categories{$catid}}, $description;
}
my $finaldesc;
# 1 = noun
# 2 = verb
# 3 = adjective
# 4 = adverb
for my $catid (1 .. 4) {
my $n = 1;
my $catdesc;
next unless $categories{$catid};
foreach my $desc ( @{$categories{$catid}} ) {
$catdesc .= " " if $catdesc;
$catdesc .= "$n. $desc";
$n++;
}
next unless $catdesc;
$finaldesc .= "\t" if $finaldesc;
$finaldesc .= getcatname($catid) . ": $catdesc"
}
die "Internal error" unless $finaldesc;
print OUTPUT "$word\t$finaldesc\n";
}
close OUTPUT;
print "Done, output was successfully written!\n";
|