| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
"""FontLab Tokenize |
|---|
| 4 |
|
|---|
| 5 |
Tokenize FontLabâs preview/metrics text into single characters |
|---|
| 6 |
respecting escaped glyph names (eg. â/A.smcpâ) and providing a |
|---|
| 7 |
lossless reverse function. Sample usage (and actual test suite): |
|---|
| 8 |
|
|---|
| 9 |
>>> tokenize('/A/B/C') |
|---|
| 10 |
['/A', '/B', '/C'] |
|---|
| 11 |
>>> tokenize('abcde/B/C') |
|---|
| 12 |
['a', 'b', 'c', 'd', 'e', '/B', '/C'] |
|---|
| 13 |
>>> tokenize('foo/A.smcp/B.smcp abc') |
|---|
| 14 |
['f', 'o', 'o', '/A.smcp', '/B.smcp', 'a', 'b', 'c'] |
|---|
| 15 |
>>> p = ['f', 'o', 'o', '/A.smcp', '/B.smcp', 'a', 'b', 'c'] |
|---|
| 16 |
>>> serialize(p) |
|---|
| 17 |
'foo/A.smcp/B.smcp abc' |
|---|
| 18 |
>>> tokenize('/a /b /c') |
|---|
| 19 |
['/a', '/b', '/c'] |
|---|
| 20 |
>>> tokenize('/a/b c') |
|---|
| 21 |
['/a', '/b', 'c'] |
|---|
| 22 |
>>> tokenize('@a@b@') |
|---|
| 23 |
['@', 'a', '@', 'b', '@'] |
|---|
| 24 |
>>> tokenize('abc def ghi ') |
|---|
| 25 |
['a', 'b', 'c', ' ', 'd', 'e', 'f', ' ', 'g', 'h', 'i', ' '] |
|---|
| 26 |
>>> p = ['a', 'b', 'c', ' ', 'd', 'e', 'f', ' ', 'g', 'h', 'i', ' '] |
|---|
| 27 |
>>> serialize(p) |
|---|
| 28 |
'abc def ghi ' |
|---|
| 29 |
>>> serialize(['/a', 'b', '/c', 'd']) |
|---|
| 30 |
'/a b/c d' |
|---|
| 31 |
""" |
|---|
| 32 |
|
|---|
| 33 |
__author__ = 'Antonio Cavedoni <http://cavedoni.com/>' |
|---|
| 34 |
__version__ = '0.1' |
|---|
| 35 |
__svnid__ = '$Id$' |
|---|
| 36 |
__license__ = 'Python' |
|---|
| 37 |
|
|---|
| 38 |
def tokenize(input): |
|---|
| 39 |
tokens = [] |
|---|
| 40 |
escaped = [] |
|---|
| 41 |
for i in range(len(input)): |
|---|
| 42 |
x = input[i] |
|---|
| 43 |
if x != '/' and not escaped: |
|---|
| 44 |
tokens.append(x) |
|---|
| 45 |
else: |
|---|
| 46 |
if x == '/' and not escaped: |
|---|
| 47 |
|
|---|
| 48 |
|
|---|
| 49 |
escaped.append(x) |
|---|
| 50 |
elif x != '/' and escaped: |
|---|
| 51 |
if i == (len(input) - 1): |
|---|
| 52 |
escaped.append(x) |
|---|
| 53 |
tokens.append("".join(escaped)) |
|---|
| 54 |
else: |
|---|
| 55 |
if x == ' ': |
|---|
| 56 |
tokens.append("".join(escaped)) |
|---|
| 57 |
escaped = [] |
|---|
| 58 |
else: |
|---|
| 59 |
escaped.append(x) |
|---|
| 60 |
elif x == '/' and escaped: |
|---|
| 61 |
|
|---|
| 62 |
|
|---|
| 63 |
tokens.append("".join(escaped)) |
|---|
| 64 |
escaped = [x] |
|---|
| 65 |
|
|---|
| 66 |
return tokens |
|---|
| 67 |
|
|---|
| 68 |
def serialize(tokens): |
|---|
| 69 |
series = [] |
|---|
| 70 |
for i in range(len(tokens)): |
|---|
| 71 |
t = tokens[i] |
|---|
| 72 |
if t.startswith('/') and i != (len(tokens) - 1): |
|---|
| 73 |
if not tokens[i+1].startswith('/'): |
|---|
| 74 |
series.append(t + ' ') |
|---|
| 75 |
else: |
|---|
| 76 |
series.append(t) |
|---|
| 77 |
else: |
|---|
| 78 |
series.append(t) |
|---|
| 79 |
|
|---|
| 80 |
return "".join(series) |
|---|
| 81 |
|
|---|
| 82 |
if __name__ == "__main__": |
|---|
| 83 |
import doctest |
|---|
| 84 |
doctest.testmod() |
|---|