# # deen.icn - give English equivalents of German words # example from the Unicon book, chapter 11 $define DEEN "http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en/de-en.txt.gz" procedure main(av) if av[1]=="-all" then all := pop(av) dd := DeenDictionary() every s := !av do { if lu := dd.lookup(s) then { if \all then every write(s, ": ", dd.lookup(s).definition) else write(s, ": ", lu.definition) } else write(s, " is not in the dictionary.") } end class buffer(filename, text) # read a buffer in from a file # todo: decompress if .gz extension method read() if match("http://", filename) then mode := "m" else mode := "r" f := open(filename, mode) | stop("can't open ", image(filename)) if filename[-3:0] == ".gz" then { if mode=="m" then { # download_to_local_file if not (f2 := open("de-en.txt.gz","w")) then stop("can't write") while s := reads(f, 1000000) do writes(f2, s) close(f) close(f2) } system("gunzip de-en.txt.gz") f := open("de-en.txt") | stop("can't read de-en.txt") } writes("Opened ",image(f),".\nReading") text := [ ] every put(text, !f) do if *text%1000=0 then writes(".") close(f) write("\ndone. Read ", *text, " lines") return end method erase() #... ? end # ...additional buffer operations initially if \filename then read() end class buftable : buffer() method read() self.buffer.read() tmp := table() every line := !text do { line ? { word := tab(many(&letters)) | stop("failed on ", image(line)) tmp[word] := line } } text := tmp return end method lookup(s) suspend ! \ (text[s]) end end class dictionaryentry(word, part, etymology, definition) # decode a dictionary entry into its components # assumed format is word;pos;eym;def method decode(s) s ? { word := tab(find(";")) move(1) part := tab(find(";")) move(1) etymology := tab(find(";")) move(1) definition := tab(0) } end method encode() # encode a dictionary entry into a string return word || ";" || part || ";" || etymology || ";" || definition end initially if /part then # constructor was called with a single string argument decode(word) end class dictionary : buftable() method read() self.buffer.read() tmp := table() every line := !text do line ? { word := tab(many(&letters)) | stop("failed on ", image(line)) tmp[word] := dictionaryentry(line) | fail } text := tmp end method Write() f := open(filename, "w") | fail every write(f, (!text).encode) close(f) end end class DeenEntry : dictionaryentry(gender) method decode(s) # write("subentry decode ", image(s)) end initially(de, en) de ? { if word := trim(tab(find("{")),,0) then { ="{" gender := tab(find("}")) } else { # here is one without gender info word := trim(tab(find("[")|0),,0) gender := "?" } } definition := en end # # Return a list of dictionary entries for a given line of text # procedure get_entries(s) subentries := [] s ? { deutsch := tab(find("::")) | stop("no :: in ", image(s)) ="::"; tab(many(' ')) english := tab(0) deutsch ? { while *(deutschwort := tab(find("|") | 0))>0 do { deutschwort := trim(deutschwort,,0) ="|" tab(many(' ')) englishword := trim(english[1:find("|",english)|0],,0) english ?:= { tab(many(' \t')) =englishword tab(many(' \t')) ="|" tab(many(' \t')) tab(0) } if i := find(";", deutschwort) then { deutschwort ? { while *(dword := tab(find(";") | 0))>0 do { =";" tab(many(' ')) if gronk:=englishword[1:upto(';|', englishword)|0] then { if *gronk>0 then { eword := gronk } } if /eword then stop("botched eword for dword ",image(dword)," in\n", s,"\n with remaining english of ", image(englishword)) if eword === "" then write("empty eword for ", dword, " in:\n", s) put(subentries, DeenEntry(dword, eword)) englishword ?:= { =eword; =";"; tab(many(' ')); tab(0)} } } } else { put(subentries, DeenEntry(deutschwort, englishword)) } } } } return subentries end class DeenDictionary : dictionary() method read() self.buffer.read() tmp := table() every line := !text do line ? { if ="#" | line=="" then next if not (L := get_entries(line)) then stop("get_entries failing on ", image(line)) every x := !L do { if not member(tmp, x.word) then tmp[x.word] := [x] else put(tmp[x.word], x) } } text := tmp end initially if stat("de-en.txt") then filename := "de-en.txt" else if stat("de-en.txt.gz") then filename := "de-en.txt.gz" else filename := DEEN self.read() end