blogit.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. # ============================================================================
  2. # Blogit.py is free software; you can redistribute it and/or modify
  3. # it under the terms of the GNU General Public License, version 3
  4. # as published by the Free Software Foundation;
  5. #
  6. # Blogit.py is distributed in the hope that it will be useful,
  7. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. # GNU General Public License for more details.
  10. #
  11. # You should have received a copy of the GNU General Public License
  12. # along with Blogit.py; if not, write to the Free Software
  13. # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  14. # ============================================================================
  15. # Copyright (C) 2013-2016 Oz Nahum Tiram <nahumoz@gmail.com>
  16. # ============================================================================
  17. import os
  18. import re
  19. import datetime
  20. import argparse
  21. import logging
  22. import sys
  23. import operator
  24. from pkg_resources import (Requirement, resource_filename, get_distribution,
  25. DistributionNotFound)
  26. from distutils.dir_util import copy_tree
  27. from collections import namedtuple
  28. import codecs
  29. import http.server
  30. import subprocess as sp
  31. import socketserver
  32. from jinja2 import Environment, FileSystemLoader, Markup
  33. from markdown2 import Markdown
  34. import tinydb
  35. from tinydb import Query
  36. try:
  37. __version__ = get_distribution('blogit').version
  38. except DistributionNotFound: # pragma: no cover
  39. __version__ = '0.3'
  40. def markdown(text, html4tags=False, tab_width=4,
  41. safe_mode=None, extras=None, link_patterns=None,
  42. use_file_vars=False):
  43. return Markdown(html4tags=html4tags, tab_width=tab_width,
  44. safe_mode=safe_mode, extras=extras,
  45. link_patterns=link_patterns,
  46. use_file_vars=use_file_vars).convert(text)
  47. logger = logging.getLogger(__name__)
  48. logger.setLevel(logging.DEBUG)
  49. ch = logging.StreamHandler()
  50. ch.setLevel(logging.INFO)
  51. logger.addHandler(ch)
  52. sys.path.insert(0, os.getcwd())
  53. # before quickstart was run, there is no conf...
  54. try:
  55. from conf import CONFIG, GLOBAL_TEMPLATE_CONTEXT
  56. jinja_env = Environment(lstrip_blocks=True, trim_blocks=True,
  57. loader=FileSystemLoader(CONFIG['templates']))
  58. def s2md(text):
  59. return Markup(markdown(text,
  60. extras=['fenced-code-blocks',
  61. 'hilite', 'tables']))
  62. jinja_env.filters['markdown'] = s2md
  63. class DataBase(object): # pragma: no coverage
  64. """A thin wrapper around TinyDB instance"""
  65. def __init__(self, path):
  66. self._db = tinydb.TinyDB(path)
  67. self.posts = self._db.table('posts')
  68. self.tags = self._db.table('tags')
  69. self.pages = self._db.table('pages')
  70. self.templates = self._db.table('templates')
  71. def __getitem__(self, key):
  72. return self._db.table(key)
  73. # this won't work when installing - content root does not exist
  74. DB = DataBase(os.path.join(CONFIG['content_root'], 'blogit.db'))
  75. except (ImportError, OSError): # pragma: no coverage
  76. cwd = os.getcwd()
  77. CONFIG = {'output_to': cwd, 'content_root': os.path.join(cwd, 'content')}
  78. DataBaseDummy = namedtuple('DataBaseDummy', ['path', 'tags'])
  79. DB = DataBaseDummy('dummy', 'tags')
  80. # with this config, pages are rendered to the location of their title
  81. KINDS = {'writing': {'name': 'writing', 'name_plural': 'writings', }, }
  82. class Tag(object):
  83. table = DB.tags
  84. db = DB
  85. def __init__(self, name):
  86. self .name = name
  87. self.permalink = GLOBAL_TEMPLATE_CONTEXT["site_url"]
  88. Tags = Query()
  89. tag = self.table.get(Tags.name == self.name)
  90. if not tag:
  91. self.table.insert({'name': self.name, 'post_ids': []})
  92. def __str__(self):
  93. return self.name
  94. def __repr__(self): # pragma: no coverage
  95. return self.name
  96. @property
  97. def slug(self):
  98. _slug = self.name.lower()
  99. _slug = re.sub(r'[;:,. ]+', '-', _slug.lstrip(',.;:-'))
  100. return _slug.lstrip('-')
  101. def set_posts(self, post_ids):
  102. if not isinstance(post_ids, list):
  103. raise ValueError("post_ids must be of type list")
  104. Tags = Query()
  105. tag = self.table.get(Tags.name == self.name)
  106. new = set(post_ids) - set(tag['post_ids'])
  107. tag['post_ids'].extend(list(new))
  108. self.table.update({'post_ids': tag['post_ids']}, doc_ids=[tag.doc_id])
  109. posts = property(fget=None, fset=set_posts)
  110. @property
  111. def entries(self):
  112. """return the actual lists of entries tagged with"""
  113. Tags = Query()
  114. tag = self.table.get(Tags.name == self.name)
  115. posts = tag['post_ids']
  116. for id in posts:
  117. post = self.db.posts.get(doc_id=id)
  118. if not post: # pragma: no coverage
  119. raise ValueError("No post found for doc_id %s" % id)
  120. yield Entry(os.path.join(CONFIG['content_root'], post['filename']), id) # noqa
  121. def render(self):
  122. """Render html page and atom feed"""
  123. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  124. context['tag'] = self
  125. entries = list(self.entries)
  126. entries.sort(key=operator.attrgetter('date'), reverse=True)
  127. context['entries'] = entries
  128. # render html page
  129. render_to = os.path.join(CONFIG['output_to'], 'tags', self.slug)
  130. if not os.path.exists(render_to): # pragma: no coverage
  131. os.makedirs(render_to)
  132. _render(context, 'tag_index.html', os.path.join(render_to, 'index.html')) # noqa
  133. # render atom.xml
  134. context['entries'] = context['entries'][:10]
  135. context['last_build'] = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") # noqa
  136. _render(context, 'atom.xml', os.path.join(render_to, 'atom.xml'))
  137. return True
  138. class Entry(object):
  139. """This is the base class for creating an HTML page from a Markdown
  140. based page.
  141. The file has the following structure for a page:
  142. .. code:
  143. ---
  144. title: example page
  145. public: yes
  146. kind: page
  147. template: about.html
  148. ---
  149. # some heading
  150. content paragraph
  151. ## heading 2
  152. some more content
  153. The file has the following structure for a blog entry:
  154. .. code:
  155. ---
  156. title: Blog post 1
  157. author: Famous author
  158. published: 2015-01-11
  159. tags: python, git, bash, linux
  160. public: yes
  161. chronological: yes
  162. kind: writing
  163. summary: This is a summry of post 1. Donec id elit non mi porta
  164. ---
  165. This is the body of post 1. Donec id elit non mi porta gravida
  166. """
  167. db = DB
  168. @classmethod
  169. def entry_from_db(kls, filename, doc_id=None):
  170. f = os.path.join(filename)
  171. return kls(f, doc_id)
  172. def __init__(self, path, doc_id=None):
  173. self._path = path
  174. self.path = path.split(CONFIG['content_root'])[-1].lstrip('/')
  175. self.id = doc_id # this is set inside prepare()
  176. try:
  177. self.prepare()
  178. except KeyError: # pragma: no coverage
  179. pass
  180. def __str__(self):
  181. return self.path
  182. def __repr__(self): # pragma: no coverage
  183. return self.path
  184. @property
  185. def name(self):
  186. return os.path.splitext(os.path.basename(self.path))[0]
  187. @property
  188. def abspath(self):
  189. return self._path
  190. @property
  191. def destination(self):
  192. return os.path.join(CONFIG['output_to'], self.permalink)
  193. @property
  194. def title(self):
  195. return self.header['title']
  196. @property
  197. def publish_date(self):
  198. try:
  199. r = datetime.datetime.strptime(self.header.get('published', ''),
  200. "%Y-%m-%d")
  201. except ValueError: # pragma: no coverage
  202. r = datetime.date.today()
  203. return r
  204. @property
  205. def permalink(self):
  206. if self.kind == 'page':
  207. dest = '%s.html' % self._path.replace('.md', "")
  208. else:
  209. dest = "%s/%s/index.html" % (KINDS[self.kind]['name_plural'],
  210. self.name)
  211. dest = dest.lstrip('/')
  212. return dest
  213. @property
  214. def tags(self):
  215. """this property is always called after prepare"""
  216. if 'tags' in self.header:
  217. tags = [Tag(t) for t in self.header['tags']]
  218. list(map(lambda t: setattr(t, 'posts', [self.id]), tags))
  219. return tags
  220. else:
  221. return []
  222. def prepare(self):
  223. self.body_html = markdown(
  224. codecs.open(self.abspath, 'r').read(),
  225. extras=['fenced-code-blocks', 'hilite', 'tables', 'metadata'])
  226. self.header = self.body_html.metadata
  227. """a blog post without tags causes an error ..."""
  228. if 'tags' in self.header: # pages can lack tags
  229. self.header['tags'] = [t.strip().lower() for t in
  230. self.header['tags'].split(',')]
  231. else:
  232. self.header['tags'] = ("",)
  233. self.date = self.header.get('published', datetime.datetime.now())
  234. if isinstance(self.date, str):
  235. self.date = datetime.datetime.strptime(self.date, "%Y-%m-%d")
  236. for k, v in self.header.items():
  237. try:
  238. setattr(self, k, v)
  239. except AttributeError:
  240. pass
  241. if self.id:
  242. return
  243. rec = {'filename': self.path,
  244. 'mtime': int(os.path.getmtime(self.abspath))}
  245. if self.header['kind'] == 'writing':
  246. _id = Entry.db.posts.insert(rec)
  247. elif self.header['kind'] == 'page':
  248. _id = Entry.db.pages.insert(rec)
  249. self.id = _id
  250. def render(self):
  251. try:
  252. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  253. context['entry'] = self
  254. _render(context, self.header.get('template', 'entry.html'),
  255. self.header.get('template', self.destination))
  256. return True
  257. except Exception: # pragma: no cover
  258. logger.exception("Found some problem with %s", self.path)
  259. sys.exit(1)
  260. def _render(context, template_path, output_path, encoding='utf-8'):
  261. template = jinja_env.get_template(template_path)
  262. html = template.render(context)
  263. try:
  264. os.makedirs(os.path.dirname(output_path))
  265. except OSError:
  266. pass
  267. destination = codecs.open(output_path, 'w', encoding)
  268. destination.write(html)
  269. destination.close()
  270. def render_archive(entries):
  271. """Creates the archive page"""
  272. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  273. context['entries'] = entries
  274. _render(context, 'archive_index.html',
  275. os.path.join(CONFIG['output_to'], 'archive/index.html')),
  276. def find_new_posts_and_pages(db):
  277. """Walk content dir, put each post and page in the database"""
  278. Q = Query()
  279. for root, dirs, files in os.walk(CONFIG['content_root']):
  280. for filename in sorted([f for f in files if
  281. f.endswith(('md', 'markdown'))]):
  282. fullpath = os.path.join(root, filename)
  283. _p = fullpath.split(CONFIG['content_root'])[-1].lstrip('/')
  284. new_mtime = int(os.path.getmtime(fullpath))
  285. e, item = None, None
  286. for collection in ['posts', 'pages']:
  287. item = db[collection].get(Q.filename == _p)
  288. if item:
  289. if new_mtime > item['mtime']:
  290. db[collection].update({'mtime': new_mtime},
  291. doc_ids=[item.doc_id])
  292. e = Entry(fullpath, doc_id=item.doc_id)
  293. break
  294. if not item:
  295. e = Entry(fullpath)
  296. if e:
  297. yield e, e.id
  298. def _get_last_entries(db, qty):
  299. """get all entries and the last qty entries"""
  300. doc_ids = [post.doc_id for post in db.posts.all()]
  301. doc_ids = sorted(doc_ids, reverse=True)
  302. # bug: here we shoud only render doc_ids[:qty]
  303. # but we can't use mtimes for sorting. We'll need to add ptime for the
  304. # database (publish time)
  305. entries = [Entry(os.path.join(CONFIG['content_root'],
  306. db.posts.get(doc_id=doc_id)['filename']), doc_id)
  307. for doc_id in doc_ids]
  308. # return _sort_entries(entries)[:qty]
  309. entries.sort(key=operator.attrgetter('date'), reverse=True)
  310. return entries[:qty], entries
  311. def update_index(entries):
  312. """find the last 10 entries in the database and create the main
  313. page.
  314. Each entry in has an doc_id, so we only get the last 10 doc_ids.
  315. This method also updates the ATOM feed.
  316. """
  317. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  318. context['entries'] = entries
  319. context['last_build'] = datetime.datetime.now().strftime(
  320. "%Y-%m-%dT%H:%M:%SZ")
  321. list(map(lambda x: _render(context, x[0],
  322. os.path.join(CONFIG['output_to'], x[1])),
  323. (('entry_index.html', 'index.html'), ('atom.xml', 'atom.xml'))))
  324. def _filter_none_public(entries):
  325. """by default entries are public, but one can hide them"""
  326. for e in entries:
  327. if e.header.get('public', 'yes').lower() in ('true', 'yes'):
  328. yield e
  329. def build(config):
  330. """Incremental build of the website"""
  331. logger.info("\nRendering website now...\n")
  332. logger.info("entries:")
  333. tags = dict()
  334. entries = list()
  335. for post, post_id in find_new_posts_and_pages(DB):
  336. # this method will also parse the post's tags and
  337. # update the db collection containing the tags.
  338. if post.render():
  339. if post.header['kind'] in ['writing', 'link']:
  340. for tag in post.tags:
  341. tag.posts = [post_id]
  342. tags[tag.name] = tag
  343. entries.append(post)
  344. logger.info("%s" % post.path)
  345. for name, to in tags.items():
  346. logger.info("updating tag %s" % name)
  347. to.render()
  348. # This is expensive, we should insert only the recent entries
  349. # to the index using BeautifulSoup
  350. # update index
  351. logger.info("Updating index")
  352. last_entries, all_entries = _get_last_entries(DB, config['INDEX_SIZE'])
  353. last_entries = list(_filter_none_public(last_entries))
  354. update_index(last_entries)
  355. # update archive
  356. logger.info("Updating archive")
  357. # This is expensive, we should insert only the recent entries
  358. # to the archive using BeautifulSoup
  359. entries = [Entry.entry_from_db(
  360. os.path.join(CONFIG['content_root'],
  361. e.get('filename')), e.doc_id) for e in
  362. DB.posts.all()]
  363. all_entries = list(_filter_none_public(all_entries))
  364. all_entries.sort(key=operator.attrgetter('date'), reverse=True)
  365. render_archive(all_entries[config['ARCHIVE_SIZE']:])
  366. def preview(): # pragma: no coverage
  367. """launch an HTTP to preview the website"""
  368. Handler = http.server.SimpleHTTPRequestHandler
  369. socketserver.TCPServer.allow_reuse_address = True
  370. port = CONFIG['http_port']
  371. httpd = socketserver.TCPServer(("", port), Handler)
  372. os.chdir(CONFIG['output_to'])
  373. try:
  374. logger.info("and ready to test at "
  375. "http://127.0.0.1:%d" % CONFIG['http_port'])
  376. logger.info("Hit Ctrl+C to exit")
  377. httpd.serve_forever()
  378. except KeyboardInterrupt:
  379. httpd.shutdown()
  380. def quick_start(): # pragma: no coverage
  381. if getattr(sys, 'frozen', False):
  382. path = os.path.join(sys._MEIPASS, 'blogit-mir')
  383. else:
  384. path = resource_filename(Requirement.parse("blogit"),
  385. 'blogit/blogit-mir')
  386. copy_tree(path, '.')
  387. print("edit conf.py, create some pages and posts and run blogit --preview")
  388. def publish(GITDIRECTORY=CONFIG['output_to']): # pragma: no coverage
  389. sp.call('git push', cwd=GITDIRECTORY, shell=True)
  390. def new_post(GITDIRECTORY=CONFIG['output_to'], kind=KINDS['writing']): # pragma: no coverage # noqa
  391. """
  392. This function should create a template for a new post with a title
  393. read from the user input.
  394. Most other fields should be defaults.
  395. TODO: update this function
  396. """
  397. title = input("Give the title of the post: ")
  398. while ':' in title:
  399. title = input("Give the title of the post (':' not allowed): ")
  400. author = CONFIG['author']
  401. date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')
  402. tags = input("Give the tags, separated by ', ':")
  403. published = 'yes'
  404. chronological = 'yes'
  405. summary = ("summary: Type your summary here.")
  406. # make file name
  407. fname = os.path.join(os.getcwd(), 'content', kind['name_plural'],
  408. datetime.datetime.strftime(datetime.datetime.now(),
  409. '%Y'),
  410. date + '-' + title.replace(' ', '-') + '.markdown')
  411. # first post every year need to create a new directory
  412. if not os.path.exists(os.path.dirname(fname)):
  413. os.makedirs(os.path.dirname(fname))
  414. with open(fname, 'w') as npost:
  415. npost.write('---\n')
  416. npost.write('title: %s\n' % title)
  417. npost.write('author: %s\n' % author)
  418. npost.write('published: %s\n' % date)
  419. npost.write('tags: %s\n' % tags)
  420. npost.write('public: %s\n' % published)
  421. npost.write('chronological: %s\n' % chronological)
  422. npost.write('kind: %s\n' % kind['name'])
  423. npost.write('%s\n' % summary)
  424. npost.write('---\n')
  425. os.system('%s %s' % (CONFIG['editor'], fname))
  426. def get_parser(formatter_class=argparse.HelpFormatter): # pragma: no coverage
  427. parser = argparse.ArgumentParser(
  428. prog='blogit',
  429. description='blogit - a simple static site generator.',
  430. formatter_class=formatter_class)
  431. parser.add_argument('-b', '--build', action="store_true",
  432. help='convert the markdown files to HTML')
  433. parser.add_argument('-p', '--preview', action="store_true",
  434. help='Launch HTTP server to preview the website')
  435. parser.add_argument('-c', '--clean', action="store_true",
  436. help='clean output files')
  437. parser.add_argument('-n', '--new', action="store_true",
  438. help='create new post')
  439. parser.add_argument('--publish', action="store_true",
  440. help='push built HTML to git upstream')
  441. parser.add_argument('--quick-start', action="store_true")
  442. parser.add_argument('--version', action="store_true")
  443. return parser
  444. def main(): # pragma: no coverage
  445. parser = get_parser()
  446. args = parser.parse_args()
  447. if len(sys.argv) < 2:
  448. parser.print_help()
  449. sys.exit()
  450. if args.version:
  451. print("This is blogit {}. Copyright Oz N Tiram "
  452. "<oz.tiram@gmail.com>".format(__version__))
  453. if args.build:
  454. build(CONFIG)
  455. if args.preview:
  456. preview()
  457. if args.new:
  458. new_post()
  459. if args.publish:
  460. publish()
  461. if args.quick_start:
  462. quick_start()
  463. if __name__ == '__main__': # pragma: no coverage
  464. main()