#!/usr/bin/env python2.2
# unisearch - quick & dirty script to grep through the unicode character
# charts, outputting UTF-8 for terminal or webpage display
# 2-dec-2003
# dan sandler - http://dsandler.org

import sys,os,codecs

# fetch this from
#   ftp://ftp.unicode.org/Public/4.0-Update1/UnicodeData-4.0.1d1b.txt
# then compress it (it's quite large)

# MODIFY THIS:
DB = '/home/dsandler/share/UnicodeData-4.0.1d1b.txt.gz'
DB_COMPRESSED = 1

# -- ok, don't modify this. unless you really feel the need --

utf_8_encoder = codecs.getencoder("utf-8")

query = " ".join(sys.argv[1:])

if len(query) > 0:
    if DB_COMPRESSED:
        P = os.popen('gzip -dc "%s" | grep -i "%s"' % (DB, query))
    else:
        P = os.popen('cat "%s" | grep -i "%s"' % (DB, query))

    for l in P.readlines():
        parts = l.strip().upper().split(';')
        sys.stdout.write(
              'U'
            + parts[0] + " : "
            + utf_8_encoder(unichr(long(parts[0],16)))[0] + " : "
            + parts[1]
            + '\n'
        )

    P.close()