blogit.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. # ============================================================================
  2. # Blogit.py is free software; you can redistribute it and/or modify
  3. # it under the terms of the GNU General Public License, version 3
  4. # as published by the Free Software Foundation;
  5. #
  6. # Blogit.py is distributed in the hope that it will be useful,
  7. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. # GNU General Public License for more details.
  10. #
  11. # You should have received a copy of the GNU General Public License
  12. # along with Blogit.py; if not, write to the Free Software
  13. # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  14. # ============================================================================
  15. # Copyright (C) 2013-2016 Oz Nahum Tiram <nahumoz@gmail.com>
  16. # ============================================================================
  17. import os
  18. import re
  19. import datetime
  20. import argparse
  21. import logging
  22. import sys
  23. import operator
  24. from pkg_resources import Requirement, resource_filename
  25. from distutils.dir_util import copy_tree
  26. from collections import namedtuple
  27. import shutil
  28. from io import StringIO
  29. import codecs
  30. import http.server
  31. import subprocess as sp
  32. import socket
  33. import socketserver
  34. from jinja2 import Environment, FileSystemLoader
  35. import markdown2
  36. import tinydb
  37. from tinydb import Query, where
  38. logger = logging.getLogger(__name__)
  39. logger.setLevel(logging.DEBUG)
  40. ch = logging.StreamHandler()
  41. ch.setLevel(logging.INFO)
  42. logger.addHandler(ch)
  43. sys.path.insert(0, os.getcwd())
  44. # before quickstart was run, there is no conf...
  45. try:
  46. from conf import CONFIG, GLOBAL_TEMPLATE_CONTEXT
  47. jinja_env = Environment(lstrip_blocks=True, trim_blocks=True,
  48. loader=FileSystemLoader(CONFIG['templates']))
  49. class DataBase(object): # pragma: no coverage
  50. """A thin wrapper around TinyDB instance"""
  51. def __init__(self, path):
  52. _db = tinydb.TinyDB(path)
  53. self.posts = _db.table('posts')
  54. self.tags = _db.table('tags')
  55. self.pages = _db.table('pages')
  56. self.templates = _db.table('templates')
  57. self._db = _db
  58. DB = DataBase(os.path.join(CONFIG['content_root'], 'blogit.db'))
  59. except ImportError:
  60. cwd = os.getcwd()
  61. CONFIG = {'output_to': cwd, 'content_root': os.path.join(cwd, 'content')}
  62. DataBaseDummy = namedtuple('DataBaseDummy', ['path', 'tags'])
  63. DB = DataBaseDummy('dummy', 'tags')
  64. # with this config, pages are rendered to the location of their title
  65. KINDS = {
  66. 'writing': {
  67. 'name': 'writing', 'name_plural': 'writings',
  68. },
  69. }
  70. class Tag(object):
  71. table = DB.tags
  72. db = DB
  73. def __init__(self, name):
  74. self .name = name
  75. self.permalink = GLOBAL_TEMPLATE_CONTEXT["site_url"]
  76. Tags = Query()
  77. tag = self.table.get(Tags.name == self.name)
  78. if not tag:
  79. self.table.insert({'name': self.name, 'post_ids': []})
  80. def __str__(self):
  81. return self.name
  82. def __repr__(self): # pragma: no coverage
  83. return self.name
  84. @property
  85. def slug(self):
  86. _slug = self.name.lower()
  87. _slug = re.sub(r'[;:,. ]+', '-', _slug.lstrip(',.;:-'))
  88. return _slug.lstrip('-')
  89. @property
  90. def posts(self):
  91. """return a listpost ids tagged with Tag"""
  92. Tags = Query()
  93. tag = self.table.get(Tags.name == self.name)
  94. return tag['post_ids']
  95. @posts.setter
  96. def posts(self, post_ids):
  97. if not isinstance(post_ids, list):
  98. raise ValueError("post_ids must be of type list")
  99. Tags = Query()
  100. tag = self.table.get(Tags.name == self.name)
  101. new = set(post_ids) - set(tag['post_ids'])
  102. tag['post_ids'].extend(list(new))
  103. self.table.update({'post_ids': tag['post_ids']}, eids=[tag.eid])
  104. @property
  105. def entries(self):
  106. """return the actual lists of entries tagged with"""
  107. Posts = Query()
  108. for id in self.posts:
  109. post = self.db.posts.get(eid=id)
  110. if not post: # pragma: no coverage
  111. raise ValueError("No post found for eid %s" % id)
  112. yield Entry(os.path.join(CONFIG['content_root'], post['filename']), id)
  113. def render(self):
  114. """Render html page and atom feed"""
  115. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  116. context['tag'] = self
  117. context['entries'] = _sort_entries(self.entries)
  118. # render html page
  119. render_to = os.path.join(CONFIG['output_to'], 'tags', self.slug)
  120. if not os.path.exists(render_to): # pragma: no coverage
  121. os.makedirs(render_to)
  122. _render(context, 'tag_index.html', os.path.join(render_to, 'index.html'))
  123. # render atom.xml
  124. context['entries'] = context['entries'][:10]
  125. context['last_build'] = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
  126. _render(context, 'atom.xml', os.path.join(render_to, 'atom.xml'))
  127. return True
  128. class Entry(object):
  129. """This is the base class for creating an HTML page from a Markdown
  130. based page.
  131. The file has the following structure for a page:
  132. .. code:
  133. ---
  134. title: example page
  135. public: yes
  136. kind: page
  137. template: about.html
  138. ---
  139. # some heading
  140. content paragraph
  141. ## heading 2
  142. some more content
  143. The file has the following structure for a blog entry:
  144. .. code:
  145. ---
  146. title: Blog post 1
  147. author: Famous author
  148. published: 2015-01-11
  149. tags: python, git, bash, linux
  150. public: yes
  151. chronological: yes
  152. kind: writing
  153. summary: This is a summry of post 1. Donec id elit non mi porta
  154. ---
  155. This is the body of post 1. Donec id elit non mi porta gravida
  156. """
  157. db = DB
  158. @classmethod
  159. def entry_from_db(kls, filename, eid=None):
  160. f = os.path.join(filename)
  161. return kls(f, eid)
  162. def __init__(self, path, eid=None):
  163. self._path = path
  164. self.path = path.split(CONFIG['content_root'])[-1].lstrip('/')
  165. self.id = eid # this is set inside prepare()
  166. try:
  167. self.prepare()
  168. except KeyError as E: # pragma: no coverage
  169. import pdb; pdb.set_trace()
  170. def __str__(self):
  171. return self.path
  172. def __repr__(self): # pragma: no coverage
  173. return self.path
  174. @property
  175. def name(self):
  176. return os.path.splitext(os.path.basename(self.path))[0]
  177. @property
  178. def abspath(self):
  179. return self._path
  180. @property
  181. def destination(self):
  182. return os.path.join(CONFIG['output_to'], self.permalink)
  183. @property
  184. def title(self):
  185. return self.header['title']
  186. @property
  187. def publish_date(self):
  188. try:
  189. r = datetime.datetime.strptime(self.header.get('published', ''), "%Y-%m-%d")
  190. except ValueError: # pragma: no coverage
  191. r = datetime.date.today()
  192. return r
  193. @property
  194. def permalink(self):
  195. if self.kind == 'page':
  196. dest = '%s.html' % self._path.replace('.md', "")
  197. else:
  198. dest = "%s/%s/index.html" % (KINDS[self.kind]['name_plural'], self.name)
  199. dest = dest.lstrip('/')
  200. return dest
  201. @property
  202. def tags(self):
  203. """this property is always called after prepare"""
  204. if 'tags' in self.header:
  205. tags = [Tag(t) for t in self.header['tags']]
  206. list(map(lambda t: setattr(t, 'posts', [self.id]), tags))
  207. return tags
  208. else:
  209. return []
  210. def prepare(self):
  211. self.body_html = markdown2.markdown(
  212. codecs.open(self.abspath, 'r').read(),
  213. extras=['fenced-code-blocks', 'hilite', 'tables', 'metadata'])
  214. self.header = self.body_html.metadata
  215. if 'tags' in self.header: # pages can lack tags
  216. self.header['tags'] = self.header['tags'].split(',')
  217. self.date = self.header.get('published', datetime.datetime.now())
  218. if isinstance(self.date, str):
  219. self.date = datetime.datetime.strptime(self.date, "%Y-%m-%d")
  220. for k, v in self.header.items():
  221. try:
  222. setattr(self, k, v)
  223. except AttributeError:
  224. pass
  225. if self.id:
  226. return
  227. if self.header['kind'] == 'writing':
  228. _id = Entry.db.posts.insert({'filename': self.path})
  229. elif self.header['kind'] == 'page':
  230. _id = Entry.db.pages.insert({'filename': self.path})
  231. self.id = _id
  232. def render(self):
  233. if self.header.get('public', '').lower() in ['true', 'yes']:
  234. try:
  235. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  236. context['entry'] = self
  237. _render(context, self.header.get('template', 'entry.html'),
  238. self.header.get('template', self.destination))
  239. return True
  240. except Exception as e: # pragma: no cover
  241. logger.exception("Found some problem with %s", self.path)
  242. sys.exit(1)
  243. def _sort_entries(entries, reversed=True):
  244. """Sort all entries by date and reverse the list"""
  245. return list(sorted(entries, key=operator.attrgetter('date'), reverse=reversed))
  246. def _render(context, template_path, output_path, encoding='utf-8'):
  247. template = jinja_env.get_template(template_path)
  248. rendered = template.render(context)
  249. html = template.render(context)
  250. try:
  251. os.makedirs(os.path.dirname(output_path))
  252. except OSError:
  253. pass
  254. destination = codecs.open(output_path, 'w', encoding)
  255. destination.write(html)
  256. destination.close()
  257. def render_archive(entries):
  258. """Creates the archive page"""
  259. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  260. context['entries'] = entries
  261. _render(context, 'archive_index.html',
  262. os.path.join(CONFIG['output_to'],'archive/index.html')),
  263. def find_new_posts_and_pages(db):
  264. """Walk content dir, put each post and page in the database"""
  265. Q = Query()
  266. for root, dirs, files in os.walk(CONFIG['content_root']):
  267. for filename in files:
  268. if filename.endswith(('md', 'markdown')):
  269. fullpath = os.path.join(root, filename)
  270. _p = fullpath.split(CONFIG['content_root'])[-1].lstrip('/')
  271. if not db.posts.contains(Q.filename == _p) and \
  272. not db.pages.contains(Q.filename == _p):
  273. e = Entry(fullpath)
  274. yield e, e.id
  275. def _get_last_entries(db, qty):
  276. eids = [post.eid for post in db.posts.all()]
  277. eids = sorted(eids, reverse=True)
  278. entries = [Entry(os.path.join(CONFIG['content_root'],
  279. db.posts.get(eid=eid)['filename']), eid) for eid in eids]
  280. return _sort_entries(entries)[:qty]
  281. def update_index(entries):
  282. """find the last 10 entries in the database and create the main
  283. page.
  284. Each entry in has an eid, so we only get the last 10 eids.
  285. This method also updates the ATOM feed.
  286. """
  287. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  288. context['entries'] = entries
  289. context['last_build'] = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
  290. list(map(lambda x: _render(
  291. context, x[0], os.path.join(CONFIG['output_to'], x[1])),
  292. (('entry_index.html', 'index.html'), ('atom.xml', 'atom.xml'))))
  293. def build(config):
  294. """Incremental build of the website"""
  295. logger.info("\nRendering website now...\n")
  296. logger.info("entries:")
  297. tags = dict()
  298. entries = list()
  299. root = CONFIG['content_root']
  300. for post, post_id in find_new_posts_and_pages(DB):
  301. # this method will also parse the post's tags and
  302. # update the db collection containing the tags.
  303. if post.render():
  304. if post.header['kind'] in ['writing', 'link']:
  305. for tag in post.tags:
  306. tag.posts = [post_id]
  307. tags[tag.name] = tag
  308. entries.append(post)
  309. logger.info("%s" % post.path)
  310. for name, to in tags.items():
  311. logger.info("updating tag %s" % name)
  312. to.render()
  313. # BUG: Only public entries should be added to the index
  314. # This is expensive, we should insert only the recent entries
  315. # to the index using BeautifulSoup
  316. # update index
  317. logger.info("Updating index")
  318. update_index(_get_last_entries(DB, config['INDEX_SIZE']))
  319. # update archive
  320. logger.info("Updating archive")
  321. # This is expensive, we should insert only the recent entries
  322. # to the archive using BeautifulSoup
  323. entries = [Entry.entry_from_db(
  324. os.path.join(CONFIG['content_root'], e.get('filename')), e.eid) for e in
  325. DB.posts.all()]
  326. render_archive(_sort_entries(entries, reversed=True)[config['ARCHIVE_SIZE']:])
  327. def preview(): # pragma: no coverage
  328. """launch an HTTP to preview the website"""
  329. Handler = http.server.SimpleHTTPRequestHandler
  330. socketserver.TCPServer.allow_reuse_address = True
  331. port = CONFIG['http_port']
  332. httpd = socketserver.TCPServer(("", port), Handler)
  333. os.chdir(CONFIG['output_to'])
  334. try:
  335. logger.info("and ready to test at http://127.0.0.1:%d" % CONFIG['http_port'])
  336. logger.info("Hit Ctrl+C to exit")
  337. httpd.serve_forever()
  338. except KeyboardInterrupt:
  339. httpd.shutdown()
  340. def quick_start(): # pragma: no coverage
  341. path = resource_filename(Requirement.parse("blogit"), 'blogit/blogit-mir')
  342. copy_tree(path, '.')
  343. def publish(GITDIRECTORY=CONFIG['output_to']): # pragma: no coverage
  344. sp.call('git push', cwd=GITDIRECTORY, shell=True)
  345. def new_post(GITDIRECTORY=CONFIG['output_to'],
  346. kind=KINDS['writing']): # pragma: no coverage
  347. """
  348. This function should create a template for a new post with a title
  349. read from the user input.
  350. Most other fields should be defaults.
  351. TODO: update this function
  352. """
  353. title = input("Give the title of the post: ")
  354. while ':' in title:
  355. title = input("Give the title of the post (':' not allowed): ")
  356. author = CONFIG['author']
  357. date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')
  358. tags = input("Give the tags, separated by ', ':")
  359. published = 'yes'
  360. chronological = 'yes'
  361. summary = ("summary: Type your summary here.")
  362. # make file name
  363. fname = os.path.join(os.getcwd(), 'content', kind['name_plural'],
  364. datetime.datetime.strftime(datetime.datetime.now(),
  365. '%Y'),
  366. date+'-'+title.replace(' ', '-')+'.markdown')
  367. with open(fname, 'w') as npost:
  368. npost.write('---\n')
  369. npost.write('title: %s\n' % title)
  370. npost.write('author: %s\n' % author)
  371. npost.write('published: %s\n' % date)
  372. npost.write('tags: %s\n' % tags)
  373. npost.write('public: %s\n' % published)
  374. npost.write('chronological: %s\n' % chronological)
  375. npost.write('kind: %s\n' % kind['name'])
  376. npost.write('%s' % summary)
  377. npost.write('---\n')
  378. os.system('%s %s' % (CONFIG['editor'], fname))
  379. def get_parser(formatter_class=argparse.HelpFormatter): # pragma: no coverage
  380. parser = argparse.ArgumentParser(
  381. prog='blogit',
  382. description='blogit - a simple static site generator.',
  383. formatter_class=formatter_class)
  384. parser.add_argument('-b', '--build', action="store_true",
  385. help='convert the markdown files to HTML')
  386. parser.add_argument('-p', '--preview', action="store_true",
  387. help='Launch HTTP server to preview the website')
  388. parser.add_argument('-c', '--clean', action="store_true",
  389. help='clean output files')
  390. parser.add_argument('-n', '--new', action="store_true",
  391. help='create new post')
  392. parser.add_argument('--publish', action="store_true",
  393. help='push built HTML to git upstream')
  394. parser.add_argument('--quick-start', action="store_true")
  395. return parser
  396. def main(): # pragma: no coverage
  397. parser = get_parser()
  398. args = parser.parse_args()
  399. if len(sys.argv) < 2:
  400. parser.print_help()
  401. sys.exit()
  402. if args.build:
  403. build(CONFIG)
  404. if args.preview:
  405. preview()
  406. if args.new:
  407. new_post()
  408. if args.publish:
  409. publish()
  410. if args.quick_start:
  411. quick_start()
  412. if __name__ == '__main__': # pragma: no coverage
  413. main()