>>> filename = 'test.txt' >>> filename.endswith('.txt') True >>> filename.startswith('file://') False >>> url.startswith('http:') True >>> [name for name in filenames if name.endswith(('.gz', '.py'))] ['t1k_bot.tar.gz', 'jemalloc-5.3.0.tar.gz', 't1k_master.tar.gz', 't.py'] >>> any(name.endswith(('.pcap', '.py')) for name in filenames) True
1 2 3 4 5 6 7 8
>>> choices = ['http', 'https'] >>> url = 'http://www.baidu.com' >>> url.startswith(choices) Traceback (most recent call last): File "<stdin>", line 1, in <module> TypeError: startswith first arg must be stror a tuple of str, notlist >>> url.startswith(tuple(choices)) True
>>> addresses = [ ... '5412 N CLARK ST', ... '1060 W ADDISON ST', ... '1039 W GRANVILLE AVE', ... '2122 N CLARK ST', ... '4802 N BROADWAY', ... ] >>> >>> [addr for addr in addresses if fnmatch(addr, "* ST")] ['5412 N CLARK ST', '1060 W ADDISON ST', '2122 N CLARK ST'] >>> [addr for addr in addresses if fnmatch(addr, '54[0-9][0-9] *CLARK*')] ['5412 N CLARK ST']
>>> text = 'yeah, but no, but yeah, but no, but yeah' >>> text == 'yeah' False >>> text.startswith('yeah') True >>> text.endswith('no') False >>> text.find('no') 10
>>> text = 'Today is 11/27/2012. PyCon starts 3/13/2013.' >>> datepat.findall(text) ['11/27/2012', '3/13/2013'] >>> for date in datepat.finditer(text): ... print(date.group(0)) ... 11/27/2012 3/13/2013
>>> text = 'Today is 11/27/2012. PyCon starts 3/13/2013.' >>> datepat.findall(text) [('11', '27', '2012'), ('3', '13', '2013')] >>> for m, d, y in datepat.findall(text): ... print('{}-{}-{}'.format(y, m, d)) ... 2012-11-27 2013-3-13
字符串搜索和替换
问题
想在字符串中实现搜索和替换。
解决方案
对于简单的字面模式替换,直接使用 str.replace() 方法即可
对于复杂的模式,则可以使用 re 模块的 sub 函数,它的第一个参数是被匹配的模式,第二个参数则是替换模式。如果使用了命名分组,那么第二个参数可以使用 \g<group_name>
>>> import re >>> comment = re.compile(r'/\*(.*?)\*/') >>> text1 = '/* this is a comment */' >>> text2 = '''/* this is a ... multiple comment */ ... ''' >>> comment.findall(text1) [' this is a comment '] >>> comment.findall(text2) []
>>> comment = re.compile(r'/\*((?:.|\n)*?)\*/') >>> comment.findall(text1) [' this is a comment '] >>> comment.findall(text2) [' this is a\nmultiple comment ']
>>> comment = re.compile(r'/\*(.*?)\*/', re.DOTALL) >>> comment.findall(text1) [' this is a comment '] >>> comment.findall(text2) [' this is a\nmultiple comment ']
>>> s = 'pýtĥöñ\fis\tawesome\r\n' >>> s 'pýtĥöñ\x0cis\tawesome\r\n' >>> remap = { ... ord('\t'): ' ', ... ord('\f'): ' ', ... ord('\r'): None ... } >>> a = s.translate(remap) >>> a 'pýtĥöñ is awesome\n'
>>> import unicodedata >>> import sys >>> >>> cmb_chrs = dict.fromkeys(c for c inrange(sys.maxunicode) if unicodedata.combining(chr(c))) >>> b = unicodedata.normalize("NFD", a) >>> b 'pýtĥöñ is awesome\n' >>> b.translate(cmb_chrs) 'python is awesome\n'
1 2 3 4 5 6 7 8 9 10
>>> digitmap = { c: ord('0') + unicodedata.digit(chr(c)) for c inrange(sys.maxunicode) if unicodedata.category(chr(c)) == "Nd" } >>> len(digitmap) 650 >>> x = '\u0661\u0662\u0663' File "<stdin>", line 1 x = '\u0661\u0662\u0663' IndentationError: unexpected indent >>> x = '\u0661\u0662\u0663' >>> x.translate(digitmap) '123'
1 2 3 4 5
>>> a 'pýtĥöñ is awesome\n' >>> b = unicodedata.normalize("NFD", a) >>> b.encode('ascii', 'ignore').decode('ascii') 'python is awesome\n'
>>> name = 'test' >>> n = 10 >>> print(sub('Hello {name}')) Hello test >>> print(sub('You have {n} messages')) You have 10 messages >>> print(sub('Your favoriate color is {color}')) Your favoriate color is {color}
>>> s = "Look into my eyes, look into my eyes, the eyes, the eyes, \ ... the eyes, not around the eyes, don't look around the eyes, \ ... look into my eyes, you're under." >>> >>> import textwrap >>> print(textwrap.fill(s, 70)) Look into my eyes, look into my eyes, the eyes, the eyes, the eyes, not around the eyes, don't look around the eyes, look into my eyes, you're under.
>>> print(textwrap.fill(s, 40)) Look into my eyes, look into my eyes, the eyes, the eyes, the eyes, not around the eyes, don't look around the eyes, look into my eyes, you're under.
>>> print(textwrap.fill(s, 40, initial_indent=' ')) Look into my eyes, look into my eyes, the eyes, the eyes, the eyes, not around the eyes, don't look around the eyes, look into my eyes, you're under.
>>> print(textwrap.fill(s, 40, subsequent_indent=' ')) Look into my eyes, look into my eyes, the eyes, the eyes, the eyes, not around the eyes, don't look around the eyes, look into my eyes, you're under.
1 2 3 4 5 6 7
>>> import os >>> os.get_terminal_size() os.terminal_size(columns=150, lines=43) >>> os.get_terminal_size().columns 150 >>> print(textwrap.fill(s, os.get_terminal_size().columns)) Look into my eyes, look into my eyes, the eyes, the eyes, the eyes, not around the eyes, don't look around the eyes, look into my eyes, you're under.
如果想将含有编码值的原始文本进行手动替换,只需要使用 HTML 或者 XML 解析器的一些工具函数/方法即可
示例
1 2 3 4 5 6 7 8
>>> s = 'Elements are written as "<tag>text</tag>".' >>> import html >>> print(s) Elements are written as"<tag>text</tag>". >>> print(html.escape(s)) Elements are written as "<tag>text</tag>". >>> print(html.escape(s, quote=False)) Elements are written as"<tag>text</tag>".
1 2 3
>>> s = 'Spicy Jalapeño' >>> s.encode('ascii', errors='xmlcharrefreplace') b'Spicy Jalapeño'
1 2 3 4 5 6
s = 'Spicy "Jalapeño".' >>> s = 'Spicy Jalapeño' >>> t = 'The prompt is >>>' >>> from xml.sax.saxutils import unescape >>> unescape(t) 'The prompt is >>>'
defgenerate_tokens(pat, text): Token = namedtuple("Token", ["type", "value"]) scanner = pat.scanner(text) for m initer(scanner.match, None): yield Token(m.lastgroup, m.group())
NAME = r'(?P<NAME>[a-zA-Z_][a-zA-Z_0-9]*)' NUM = r'(?P<NUM>\d+)' PLUS = r'(?P<PLUS>\+)' TIMES = r'(?P<TIMES>\*)' EQ = r'(?P<EQ>\=)' WS = r'(?P<WS>\s+)'
pat = re.compile('|'.join([NAME, NUM, PLUS, TIMES, EQ, WS]))
text = 'foo = 23 + 42 * 10' for tok in generate_tokens(pat, text): print(tok)
defgenerate_tokens(text): scanner = master_pat.scanner(text) for m initer(scanner.match, None): tok = Token(m.lastgroup, m.group()) if tok.type != 'WS': yield tok
def_accept(self, toktype): if self.nexttok and self.nexttok.type == toktype: self._advance() returnTrue else:
exprval = self.term() while self._accept('PLUS') or self._accept('MINUS'): op = self.tok.type right = self.term() if op == 'PLUS': exprval += right elif op == 'MINUS': exprval -= right return exprval
defterm(self): termval = self.factor() while self._accept('TIMES') or self._accept('DIVIDE'): op = self.tok.type right = self.factor() if op == 'TIMES': termval *= right elif op == 'MINUS': termval /= right return termval
deffactor(self): if self._accept('NUM'): returnint(self.tok.value) elif self._accept('LPAREN'): exprval = self.expr() self._expect('RPAREN') return exprval else: raise SyntaxError('Expected NUMBER or LPAREN')