blogit.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. # ============================================================================
  2. # Blogit.py is free software; you can redistribute it and/or modify
  3. # it under the terms of the GNU General Public License, version 3
  4. # as published by the Free Software Foundation;
  5. #
  6. # Blogit.py is distributed in the hope that it will be useful,
  7. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. # GNU General Public License for more details.
  10. #
  11. # You should have received a copy of the GNU General Public License
  12. # along with Blogit.py; if not, write to the Free Software
  13. # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  14. # ============================================================================
  15. # Copyright (C) 2013-2016 Oz Nahum Tiram <nahumoz@gmail.com>
  16. # ============================================================================
  17. import os
  18. import re
  19. import datetime
  20. import argparse
  21. import logging
  22. import sys
  23. import operator
  24. from pkg_resources import (Requirement, resource_filename, get_distribution,
  25. DistributionNotFound)
  26. from distutils.dir_util import copy_tree
  27. from collections import namedtuple
  28. import codecs
  29. import http.server
  30. import subprocess as sp
  31. import socketserver
  32. from jinja2 import Environment, FileSystemLoader, Markup
  33. import markdown2
  34. import markdown2 as md2
  35. import tinydb
  36. from tinydb import Query
  37. try:
  38. __version__ = get_distribution('blogit').version
  39. except DistributionNotFound: # pragma: no cover
  40. __version__ = '0.2'
  41. logger = logging.getLogger(__name__)
  42. logger.setLevel(logging.DEBUG)
  43. ch = logging.StreamHandler()
  44. ch.setLevel(logging.INFO)
  45. logger.addHandler(ch)
  46. sys.path.insert(0, os.getcwd())
  47. # before quickstart was run, there is no conf...
  48. try:
  49. from conf import CONFIG, GLOBAL_TEMPLATE_CONTEXT
  50. jinja_env = Environment(lstrip_blocks=True, trim_blocks=True,
  51. loader=FileSystemLoader(CONFIG['templates']))
  52. jinja_env.filters['markdown'] = lambda text: Markup(md2.markdown(
  53. text, extras=['fenced-code-blocks', 'hilite', 'tables', 'metadata']))
  54. class DataBase(object): # pragma: no coverage
  55. """A thin wrapper around TinyDB instance"""
  56. def __init__(self, path):
  57. self._db = tinydb.TinyDB(path)
  58. self.posts = self._db.table('posts')
  59. self.tags = self._db.table('tags')
  60. self.pages = self._db.table('pages')
  61. self.templates = self._db.table('templates')
  62. def __getitem__(self, key):
  63. return self._db.table(key)
  64. # this won't work when installing - content root does not exist
  65. DB = DataBase(os.path.join(CONFIG['content_root'], 'blogit.db'))
  66. except (ImportError, OSError): # pragma: no coverage
  67. cwd = os.getcwd()
  68. CONFIG = {'output_to': cwd, 'content_root': os.path.join(cwd, 'content')}
  69. DataBaseDummy = namedtuple('DataBaseDummy', ['path', 'tags'])
  70. DB = DataBaseDummy('dummy', 'tags')
  71. # with this config, pages are rendered to the location of their title
  72. KINDS = {'writing': {'name': 'writing', 'name_plural': 'writings', }, }
  73. class Tag(object):
  74. table = DB.tags
  75. db = DB
  76. def __init__(self, name):
  77. self .name = name
  78. self.permalink = GLOBAL_TEMPLATE_CONTEXT["site_url"]
  79. Tags = Query()
  80. tag = self.table.get(Tags.name == self.name)
  81. if not tag:
  82. self.table.insert({'name': self.name, 'post_ids': []})
  83. def __str__(self):
  84. return self.name
  85. def __repr__(self): # pragma: no coverage
  86. return self.name
  87. @property
  88. def slug(self):
  89. _slug = self.name.lower()
  90. _slug = re.sub(r'[;:,. ]+', '-', _slug.lstrip(',.;:-'))
  91. return _slug.lstrip('-')
  92. @property
  93. def posts(self):
  94. """return a listpost ids tagged with Tag"""
  95. Tags = Query()
  96. tag = self.table.get(Tags.name == self.name)
  97. return tag['post_ids']
  98. @posts.setter
  99. def posts(self, post_ids):
  100. if not isinstance(post_ids, list):
  101. raise ValueError("post_ids must be of type list")
  102. Tags = Query()
  103. tag = self.table.get(Tags.name == self.name)
  104. new = set(post_ids) - set(tag['post_ids'])
  105. tag['post_ids'].extend(list(new))
  106. self.table.update({'post_ids': tag['post_ids']}, eids=[tag.eid])
  107. @property
  108. def entries(self):
  109. """return the actual lists of entries tagged with"""
  110. for id in self.posts:
  111. post = self.db.posts.get(eid=id)
  112. if not post: # pragma: no coverage
  113. raise ValueError("No post found for eid %s" % id)
  114. yield Entry(os.path.join(CONFIG['content_root'], post['filename']), id) # noqa
  115. def render(self):
  116. """Render html page and atom feed"""
  117. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  118. context['tag'] = self
  119. entries = list(self.entries)
  120. entries.sort(key=operator.attrgetter('date'), reverse=True)
  121. context['entries'] = entries
  122. # render html page
  123. render_to = os.path.join(CONFIG['output_to'], 'tags', self.slug)
  124. if not os.path.exists(render_to): # pragma: no coverage
  125. os.makedirs(render_to)
  126. _render(context, 'tag_index.html', os.path.join(render_to, 'index.html')) # noqa
  127. # render atom.xml
  128. context['entries'] = context['entries'][:10]
  129. context['last_build'] = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") # noqa
  130. _render(context, 'atom.xml', os.path.join(render_to, 'atom.xml'))
  131. return True
  132. class Entry(object):
  133. """This is the base class for creating an HTML page from a Markdown
  134. based page.
  135. The file has the following structure for a page:
  136. .. code:
  137. ---
  138. title: example page
  139. public: yes
  140. kind: page
  141. template: about.html
  142. ---
  143. # some heading
  144. content paragraph
  145. ## heading 2
  146. some more content
  147. The file has the following structure for a blog entry:
  148. .. code:
  149. ---
  150. title: Blog post 1
  151. author: Famous author
  152. published: 2015-01-11
  153. tags: python, git, bash, linux
  154. public: yes
  155. chronological: yes
  156. kind: writing
  157. summary: This is a summry of post 1. Donec id elit non mi porta
  158. ---
  159. This is the body of post 1. Donec id elit non mi porta gravida
  160. """
  161. db = DB
  162. @classmethod
  163. def entry_from_db(kls, filename, eid=None):
  164. f = os.path.join(filename)
  165. return kls(f, eid)
  166. def __init__(self, path, eid=None):
  167. self._path = path
  168. self.path = path.split(CONFIG['content_root'])[-1].lstrip('/')
  169. self.id = eid # this is set inside prepare()
  170. try:
  171. self.prepare()
  172. except KeyError: # pragma: no coverage
  173. pass
  174. def __str__(self):
  175. return self.path
  176. def __repr__(self): # pragma: no coverage
  177. return self.path
  178. @property
  179. def name(self):
  180. return os.path.splitext(os.path.basename(self.path))[0]
  181. @property
  182. def abspath(self):
  183. return self._path
  184. @property
  185. def destination(self):
  186. return os.path.join(CONFIG['output_to'], self.permalink)
  187. @property
  188. def title(self):
  189. return self.header['title']
  190. @property
  191. def publish_date(self):
  192. try:
  193. r = datetime.datetime.strptime(self.header.get('published', ''),
  194. "%Y-%m-%d")
  195. except ValueError: # pragma: no coverage
  196. r = datetime.date.today()
  197. return r
  198. @property
  199. def permalink(self):
  200. if self.kind == 'page':
  201. dest = '%s.html' % self._path.replace('.md', "")
  202. else:
  203. dest = "%s/%s/index.html" % (KINDS[self.kind]['name_plural'],
  204. self.name)
  205. dest = dest.lstrip('/')
  206. return dest
  207. @property
  208. def tags(self):
  209. """this property is always called after prepare"""
  210. if 'tags' in self.header:
  211. tags = [Tag(t) for t in self.header['tags']]
  212. list(map(lambda t: setattr(t, 'posts', [self.id]), tags))
  213. return tags
  214. else:
  215. return []
  216. def prepare(self):
  217. self.body_html = markdown2.markdown(
  218. codecs.open(self.abspath, 'r').read(),
  219. extras=['fenced-code-blocks', 'hilite', 'tables', 'metadata'])
  220. self.header = self.body_html.metadata
  221. if 'tags' in self.header: # pages can lack tags
  222. self.header['tags'] = [t.strip().lower() for t in
  223. self.header['tags'].split(',')]
  224. self.date = self.header.get('published', datetime.datetime.now())
  225. if isinstance(self.date, str):
  226. self.date = datetime.datetime.strptime(self.date, "%Y-%m-%d")
  227. for k, v in self.header.items():
  228. try:
  229. setattr(self, k, v)
  230. except AttributeError:
  231. pass
  232. if self.id:
  233. return
  234. rec = {'filename': self.path,
  235. 'mtime': int(os.path.getmtime(self.abspath))}
  236. if self.header['kind'] == 'writing':
  237. _id = Entry.db.posts.insert(rec)
  238. elif self.header['kind'] == 'page':
  239. _id = Entry.db.pages.insert(rec)
  240. self.id = _id
  241. def render(self):
  242. try:
  243. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  244. context['entry'] = self
  245. _render(context, self.header.get('template', 'entry.html'),
  246. self.header.get('template', self.destination))
  247. return True
  248. except Exception: # pragma: no cover
  249. logger.exception("Found some problem with %s", self.path)
  250. sys.exit(1)
  251. def _render(context, template_path, output_path, encoding='utf-8'):
  252. template = jinja_env.get_template(template_path)
  253. html = template.render(context)
  254. try:
  255. os.makedirs(os.path.dirname(output_path))
  256. except OSError:
  257. pass
  258. destination = codecs.open(output_path, 'w', encoding)
  259. destination.write(html)
  260. destination.close()
  261. def render_archive(entries):
  262. """Creates the archive page"""
  263. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  264. context['entries'] = entries
  265. _render(context, 'archive_index.html',
  266. os.path.join(CONFIG['output_to'], 'archive/index.html')),
  267. def find_new_posts_and_pages(db):
  268. """Walk content dir, put each post and page in the database"""
  269. Q = Query()
  270. for root, dirs, files in os.walk(CONFIG['content_root']):
  271. for filename in sorted([f for f in files if
  272. f.endswith(('md', 'markdown'))]):
  273. fullpath = os.path.join(root, filename)
  274. _p = fullpath.split(CONFIG['content_root'])[-1].lstrip('/')
  275. new_mtime = int(os.path.getmtime(fullpath))
  276. e, item = None, None
  277. for collection in ['posts', 'pages']:
  278. item = db[collection].get(Q.filename == _p)
  279. if item:
  280. if new_mtime > item['mtime']:
  281. db[collection].update({'mtime': new_mtime},
  282. eids=[item.eid])
  283. e = Entry(fullpath, eid=item.eid)
  284. break
  285. if not item:
  286. e = Entry(fullpath)
  287. if e:
  288. yield e, e.id
  289. def _get_last_entries(db, qty):
  290. """get all entries and the last qty entries"""
  291. eids = [post.eid for post in db.posts.all()]
  292. eids = sorted(eids, reverse=True)
  293. entries = [Entry(os.path.join(CONFIG['content_root'],
  294. db.posts.get(eid=eid)['filename']), eid) for eid in eids]
  295. # return _sort_entries(entries)[:qty]
  296. entries.sort(key=operator.attrgetter('date'), reverse=True)
  297. return entries[:qty], entries
  298. def update_index(entries):
  299. """find the last 10 entries in the database and create the main
  300. page.
  301. Each entry in has an eid, so we only get the last 10 eids.
  302. This method also updates the ATOM feed.
  303. """
  304. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  305. context['entries'] = entries
  306. context['last_build'] = datetime.datetime.now().strftime(
  307. "%Y-%m-%dT%H:%M:%SZ")
  308. list(map(lambda x: _render(context, x[0],
  309. os.path.join(CONFIG['output_to'], x[1])),
  310. (('entry_index.html', 'index.html'), ('atom.xml', 'atom.xml'))))
  311. def _filter_none_public(entries):
  312. for e in entries:
  313. if e.header.get('public').lower() in ('true', 'yes'):
  314. yield e
  315. def build(config):
  316. """Incremental build of the website"""
  317. logger.info("\nRendering website now...\n")
  318. logger.info("entries:")
  319. tags = dict()
  320. entries = list()
  321. for post, post_id in find_new_posts_and_pages(DB):
  322. # this method will also parse the post's tags and
  323. # update the db collection containing the tags.
  324. if post.render():
  325. if post.header['kind'] in ['writing', 'link']:
  326. for tag in post.tags:
  327. tag.posts = [post_id]
  328. tags[tag.name] = tag
  329. entries.append(post)
  330. logger.info("%s" % post.path)
  331. for name, to in tags.items():
  332. logger.info("updating tag %s" % name)
  333. to.render()
  334. # This is expensive, we should insert only the recent entries
  335. # to the index using BeautifulSoup
  336. # update index
  337. logger.info("Updating index")
  338. last_entries, all_entries = _get_last_entries(DB, config['INDEX_SIZE'])
  339. last_entries = list(_filter_none_public(last_entries))
  340. update_index(last_entries)
  341. # update archive
  342. logger.info("Updating archive")
  343. # This is expensive, we should insert only the recent entries
  344. # to the archive using BeautifulSoup
  345. entries = [Entry.entry_from_db(
  346. os.path.join(CONFIG['content_root'],
  347. e.get('filename')), e.eid) for e in
  348. DB.posts.all()]
  349. all_entries = list(_filter_none_public(all_entries))
  350. all_entries.sort(key=operator.attrgetter('date'), reverse=True)
  351. render_archive(all_entries[config['ARCHIVE_SIZE']:])
  352. def preview(): # pragma: no coverage
  353. """launch an HTTP to preview the website"""
  354. Handler = http.server.SimpleHTTPRequestHandler
  355. socketserver.TCPServer.allow_reuse_address = True
  356. port = CONFIG['http_port']
  357. httpd = socketserver.TCPServer(("", port), Handler)
  358. os.chdir(CONFIG['output_to'])
  359. try:
  360. logger.info("and ready to test at "
  361. "http://127.0.0.1:%d" % CONFIG['http_port'])
  362. logger.info("Hit Ctrl+C to exit")
  363. httpd.serve_forever()
  364. except KeyboardInterrupt:
  365. httpd.shutdown()
  366. def quick_start(): # pragma: no coverage
  367. path = resource_filename(Requirement.parse("blogit"), 'blogit/blogit-mir')
  368. copy_tree(path, '.')
  369. def publish(GITDIRECTORY=CONFIG['output_to']): # pragma: no coverage
  370. sp.call('git push', cwd=GITDIRECTORY, shell=True)
  371. def new_post(GITDIRECTORY=CONFIG['output_to'], kind=KINDS['writing']): # pragma: no coverage # noqa
  372. """
  373. This function should create a template for a new post with a title
  374. read from the user input.
  375. Most other fields should be defaults.
  376. TODO: update this function
  377. """
  378. title = input("Give the title of the post: ")
  379. while ':' in title:
  380. title = input("Give the title of the post (':' not allowed): ")
  381. author = CONFIG['author']
  382. date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')
  383. tags = input("Give the tags, separated by ', ':")
  384. published = 'yes'
  385. chronological = 'yes'
  386. summary = ("summary: Type your summary here.")
  387. # make file name
  388. fname = os.path.join(os.getcwd(), 'content', kind['name_plural'],
  389. datetime.datetime.strftime(datetime.datetime.now(),
  390. '%Y'),
  391. date + '-' + title.replace(' ', '-') + '.markdown')
  392. with open(fname, 'w') as npost:
  393. npost.write('---\n')
  394. npost.write('title: %s\n' % title)
  395. npost.write('author: %s\n' % author)
  396. npost.write('published: %s\n' % date)
  397. npost.write('tags: %s\n' % tags)
  398. npost.write('public: %s\n' % published)
  399. npost.write('chronological: %s\n' % chronological)
  400. npost.write('kind: %s\n' % kind['name'])
  401. npost.write('%s\n' % summary)
  402. npost.write('---\n')
  403. os.system('%s %s' % (CONFIG['editor'], fname))
  404. def get_parser(formatter_class=argparse.HelpFormatter): # pragma: no coverage
  405. parser = argparse.ArgumentParser(
  406. prog='blogit',
  407. description='blogit - a simple static site generator.',
  408. formatter_class=formatter_class)
  409. parser.add_argument('-b', '--build', action="store_true",
  410. help='convert the markdown files to HTML')
  411. parser.add_argument('-p', '--preview', action="store_true",
  412. help='Launch HTTP server to preview the website')
  413. parser.add_argument('-c', '--clean', action="store_true",
  414. help='clean output files')
  415. parser.add_argument('-n', '--new', action="store_true",
  416. help='create new post')
  417. parser.add_argument('--publish', action="store_true",
  418. help='push built HTML to git upstream')
  419. parser.add_argument('--quick-start', action="store_true")
  420. parser.add_argument('--version', action="store_true")
  421. return parser
  422. def main(): # pragma: no coverage
  423. parser = get_parser()
  424. args = parser.parse_args()
  425. if len(sys.argv) < 2:
  426. parser.print_help()
  427. sys.exit()
  428. if args.version:
  429. print("This is blogit {}. Copyright Oz N Tiram "
  430. "<oz.tiram@gmail.com>".format(__version__))
  431. if args.build:
  432. build(CONFIG)
  433. if args.preview:
  434. preview()
  435. if args.new:
  436. new_post()
  437. if args.publish:
  438. publish()
  439. if args.quick_start:
  440. quick_start()
  441. if __name__ == '__main__': # pragma: no coverage
  442. main()