#!/usr/bin/env python2.2 # unisearch - quick & dirty script to grep through the unicode character # charts, outputting UTF-8 for terminal or webpage display # 2-dec-2003 # dan sandler - http://dsandler.org import sys,os,codecs # fetch this from # ftp://ftp.unicode.org/Public/4.0-Update1/UnicodeData-4.0.1d1b.txt # then compress it (it's quite large) # MODIFY THIS: DB = '/home/dsandler/share/UnicodeData-4.0.1d1b.txt.gz' DB_COMPRESSED = 1 # -- ok, don't modify this. unless you really feel the need -- utf_8_encoder = codecs.getencoder("utf-8") query = " ".join(sys.argv[1:]) if len(query) > 0: if DB_COMPRESSED: P = os.popen('gzip -dc "%s" | grep -i "%s"' % (DB, query)) else: P = os.popen('cat "%s" | grep -i "%s"' % (DB, query)) for l in P.readlines(): parts = l.strip().upper().split(';') sys.stdout.write( 'U' + parts[0] + " : " + utf_8_encoder(unichr(long(parts[0],16)))[0] + " : " + parts[1] + '\n' ) P.close()