root/trunk/Scripts/Contributed/FontLabTokenize.py

Revision 25, 2.4 kB (checked in by erik, 3 years ago)

Added >>> for the benefit of the doctests.

Line 
1 # -*- coding: utf-8 -*-
2
3 """FontLab Tokenize
4
5 Tokenize FontLab’s preview/metrics text into single characters
6 respecting escaped glyph names (eg. “/A.smcp”) and providing a
7 lossless reverse function. Sample usage (and actual test suite):
8
9 >>> tokenize('/A/B/C')
10 ['/A', '/B', '/C']
11 >>> tokenize('abcde/B/C')
12 ['a', 'b', 'c', 'd', 'e', '/B', '/C']
13 >>> tokenize('foo/A.smcp/B.smcp abc')
14 ['f', 'o', 'o', '/A.smcp', '/B.smcp', 'a', 'b', 'c']
15 >>> p = ['f', 'o', 'o', '/A.smcp', '/B.smcp', 'a', 'b', 'c']
16 >>> serialize(p)
17 'foo/A.smcp/B.smcp abc'
18 >>> tokenize('/a /b /c')
19 ['/a', '/b', '/c']
20 >>> tokenize('/a/b c')
21 ['/a', '/b', 'c']
22 >>> tokenize('@a@b@')
23 ['@', 'a', '@', 'b', '@']
24 >>> tokenize('abc def ghi ')
25 ['a', 'b', 'c', ' ', 'd', 'e', 'f', ' ', 'g', 'h', 'i', ' ']
26 >>> p = ['a', 'b', 'c', ' ', 'd', 'e', 'f', ' ', 'g', 'h', 'i', ' ']
27 >>> serialize(p)
28 'abc def ghi '
29 >>> serialize(['/a', 'b', '/c', 'd'])
30 '/a b/c d'
31 """
32
33 __author__ = 'Antonio Cavedoni <http://cavedoni.com/>'
34 __version__ = '0.1'
35 __svnid__ = '$Id$'
36 __license__ = 'Python'
37
38 def tokenize(input):
39    tokens = []
40    escaped = []
41    for i in range(len(input)):
42        x = input[i]
43        if x != '/' and not escaped:
44            tokens.append(x)
45        else:
46            if x == '/' and not escaped:
47                # append the slash so the escaped list is no longer
48                # false: starts capturing elements
49                escaped.append(x)
50            elif x != '/' and escaped:
51                if i == (len(input) - 1):
52                    escaped.append(x)
53                    tokens.append("".join(escaped))
54                else:
55                    if x == ' ':
56                        tokens.append("".join(escaped))
57                        escaped = []
58                    else:
59                        escaped.append(x)
60            elif x == '/' and escaped:
61                # starts a new sequence so, flush the escaped buffer
62                # and start anew
63                tokens.append("".join(escaped))
64                escaped = [x]
65
66    return tokens
67
68 def serialize(tokens):
69    series = []
70    for i in range(len(tokens)):
71        t = tokens[i]
72        if t.startswith('/') and i != (len(tokens) - 1):
73            if not tokens[i+1].startswith('/'):
74                series.append(t + ' ')
75            else:
76                series.append(t)
77        else:
78            series.append(t)
79
80    return "".join(series)
81
82 if __name__ == "__main__":
83    import doctest
84    doctest.testmod()
Note: See TracBrowser for help on using the browser.