blogit.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. # ============================================================================
  2. # Blogit.py is free software; you can redistribute it and/or modify
  3. # it under the terms of the GNU General Public License, version 3
  4. # as published by the Free Software Foundation;
  5. #
  6. # Blogit.py is distributed in the hope that it will be useful,
  7. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. # GNU General Public License for more details.
  10. #
  11. # You should have received a copy of the GNU General Public License
  12. # along with Blogit.py; if not, write to the Free Software
  13. # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  14. # ============================================================================
  15. # Copyright (C) 2013-2016 Oz Nahum Tiram <nahumoz@gmail.com>
  16. # ============================================================================
  17. from __future__ import print_function
  18. import os
  19. import re
  20. import datetime
  21. import argparse
  22. import sys
  23. import operator
  24. import shutil
  25. from StringIO import StringIO
  26. import codecs
  27. import subprocess as sp
  28. import SimpleHTTPServer
  29. import BaseHTTPServer
  30. import socket
  31. import SocketServer
  32. from jinja2 import Environment, FileSystemLoader
  33. import markdown2
  34. import tinydb
  35. from tinydb import Query, where
  36. sys.path.insert(0, os.getcwd())
  37. from conf import CONFIG, ARCHIVE_SIZE, GLOBAL_TEMPLATE_CONTEXT, KINDS
  38. jinja_env = Environment(lstrip_blocks=True, trim_blocks=True,
  39. loader=FileSystemLoader(CONFIG['templates']))
  40. class DataBase(object): # pragma: no cover
  41. """A thin wrapper around TinyDB instance"""
  42. def __init__(self, path):
  43. _db = tinydb.TinyDB(path)
  44. self.posts = _db.table('posts')
  45. self.tags = _db.table('tags')
  46. self.pages = _db.table('pages')
  47. self.templates = _db.table('templates')
  48. self._db = _db
  49. DB = DataBase(os.path.join(CONFIG['content_root'], 'blogit.db'))
  50. class Tag(object):
  51. table = DB.tags
  52. db = DB
  53. def __init__(self, name):
  54. self .name = name
  55. self.permalink = GLOBAL_TEMPLATE_CONTEXT["site_url"]
  56. Tags = Query()
  57. tag = self.table.get(Tags.name == self.name)
  58. if not tag:
  59. self.table.insert({'name': self.name, 'post_ids': []})
  60. def __str__(self):
  61. return self.name
  62. def __repr__(self):
  63. return self.name
  64. @property
  65. def slug(self):
  66. _slug = self.name.lower()
  67. _slug = re.sub(r'[;:,. ]+', '-', _slug)
  68. return _slug
  69. @property
  70. def posts(self):
  71. """
  72. return a list of post ids tagged with Tag
  73. """
  74. Tags = Query()
  75. tag = self.table.get(Tags.name == self.name)
  76. return tag['post_ids']
  77. @posts.setter
  78. def posts(self, post_ids):
  79. if not isinstance(post_ids, list):
  80. raise ValueError("post_ids must be of type list")
  81. Tags = Query()
  82. tag = self.table.get(Tags.name == self.name)
  83. # if not tag: # pragma: no coverage
  84. # raise ValueError("Tag %s not found" % self.name)
  85. # else:
  86. new = set(post_ids) - set(tag['post_ids'])
  87. tag['post_ids'].extend(list(new))
  88. self.table.update({'post_ids': tag['post_ids']}, eids=[tag.eid])
  89. @property
  90. def entries(self):
  91. """return the actual lists of entries tagged with"""
  92. Posts = Query()
  93. for id in self.posts:
  94. post = self.db.posts.get(eid=id)
  95. if not post:
  96. raise ValueError("no post found for eid %s" % id)
  97. yield Entry(os.path.join(CONFIG['content_root'], post['filename']))
  98. def render(self):
  99. """Render html page and atom feed"""
  100. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  101. context['tag'] = self
  102. context['entries'] = _sort_entries(self.entries)
  103. # render html page
  104. render_to = os.path.join(CONFIG['output_to'], 'tags', self.slug)
  105. if not os.path.exists(render_to):
  106. os.makedirs(render_to)
  107. _render(context, 'tag_index.html', os.path.join(render_to, 'index.html'))
  108. # render atom.xml
  109. context['entries'] = context['entries'][:10]
  110. _render(context, 'atom.xml', os.path.join(render_to, 'atom.xml'))
  111. return True
  112. class Entry(object):
  113. """This is the base class for creating an HTML page from a Markdown
  114. based page.
  115. The file has the following structure for a page:
  116. .. code:
  117. ---
  118. title: example page
  119. public: yes
  120. kind: page
  121. template: about.html
  122. ---
  123. # some heading
  124. content paragraph
  125. ## heading 2
  126. some more content
  127. The file has the following structure for a blog entry:
  128. .. code:
  129. ---
  130. title: Blog post 1
  131. author: Famous author
  132. published: 2015-01-11
  133. tags: [python, git, bash, linux]
  134. public: yes
  135. chronological: yes
  136. kind: writing
  137. summary: This is a summry of post 1. Donec id elit non mi porta
  138. ---
  139. This is the body of post 1. Donec id elit non mi porta gravida
  140. """
  141. db = DB
  142. @classmethod
  143. def entry_from_db(kls, filename):
  144. f = os.path.join(filename)
  145. return kls(f)
  146. def __init__(self, path):
  147. self._path = path
  148. self.path = path.split(CONFIG['content_root'])[-1].lstrip('/')
  149. self.id = None # this is set inside prepare()
  150. self.prepare()
  151. def __str__(self):
  152. return self.path
  153. def __repr__(self):
  154. return self.path
  155. @property
  156. def name(self):
  157. return os.path.splitext(os.path.basename(self.path))[0]
  158. @property
  159. def abspath(self):
  160. return self._path
  161. @property
  162. def destination(self):
  163. return os.path.join(CONFIG['output_to'], self.permalink)
  164. @property
  165. def title(self):
  166. return self.header['title']
  167. @property
  168. def summary_html(self):
  169. return "%s" % markdown2.markdown(self.header.get('summary', "").strip())
  170. @property
  171. def summary_atom(self):
  172. summarya = markdown2.markdown(self.header.get('summary', "").strip())
  173. summarya = re.sub("<p>|</p>", "", summarya)
  174. more = '<a href="%s"> continue reading...</a>' % (self.permalink)
  175. return summarya+more
  176. @property
  177. def publish_date(self):
  178. return self.header.get('published',
  179. datetime.date.today().strftime("%Y-%m-%d"))
  180. @property
  181. def permalink(self):
  182. if self.kind == 'page':
  183. dest = '%s.html' % self.title.replace('/', "-")
  184. else:
  185. dest = "%s/%s/index.html" % (KINDS[self.kind]['name_plural'], self.name)
  186. dest = dest.lstrip('/')
  187. return dest
  188. @property
  189. def tags(self):
  190. """this property is always called after prepare"""
  191. if 'tags' in self.header:
  192. tags = [Tag(t) for t in self.header['tags']]
  193. map(lambda t: setattr(t, 'posts', [self.id]), tags)
  194. return tags
  195. else:
  196. return []
  197. def prepare(self):
  198. self.body_html = markdown2.markdown(
  199. codecs.open(self.abspath, 'r').read(),
  200. extras=['fenced-code-blocks', 'hilite', 'tables', 'metadata'])
  201. self.header = self.body_html.metadata
  202. if 'tags' in self.header: # pages can lack tags
  203. self.header['tags'] = self.header['tags'].split(',')
  204. self.date = self.header.get('published', datetime.date.today())
  205. if isinstance(self.date, unicode):
  206. self.date = datetime.datetime.strptime(self.date, "%Y-%m-%d")
  207. for k, v in self.header.items():
  208. try:
  209. setattr(self, k, v)
  210. except AttributeError:
  211. pass
  212. if self.header['kind'] == 'writing':
  213. rv = Entry.db.posts.search(where('filename') == self.path)
  214. if not rv:
  215. _id = Entry.db.posts.insert({'filename': self.path})
  216. else:
  217. _id = rv[0].eid
  218. elif self.header['kind'] == 'page':
  219. rv = Entry.db.pages.search(where('filename') == self.path)
  220. if not rv:
  221. _id = Entry.db.pages.insert({'filename': self.path})
  222. else:
  223. _id = rv[0].eid
  224. self.id = _id
  225. def render(self):
  226. if not self.header['public']:
  227. return False
  228. try:
  229. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  230. context['entry'] = self
  231. _render(context, self.header.get('template', 'entry.html'),
  232. self.destination)
  233. return True
  234. except Exception as e: # pragma: no cover
  235. print(context)
  236. print(self.path)
  237. print(e)
  238. sys.exit(1)
  239. def _sort_entries(entries):
  240. """Sort all entries by date and reverse the list"""
  241. return list(reversed(sorted(entries, key=operator.attrgetter('date'))))
  242. def _render(context, template_path, output_path, encoding='utf-8'):
  243. template = jinja_env.get_template(template_path)
  244. rendered = template.render(context)
  245. html = template.render(context)
  246. try:
  247. os.makedirs(os.path.dirname(output_path))
  248. except OSError:
  249. pass
  250. destination = codecs.open(output_path, 'w', encoding)
  251. destination.write(html)
  252. destination.close()
  253. def render_archive(entries):
  254. """
  255. This function creates the archive page
  256. """
  257. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  258. context['entries'] = entries[ARCHIVE_SIZE:10]
  259. _render(context, 'archive_index.html',
  260. os.path.join(CONFIG['output_to'],'archive/index.html')),
  261. def find_new_posts_and_pages(db):
  262. """Walk content dir, put each post and page in the database"""
  263. Q = Query()
  264. for root, dirs, files in os.walk(CONFIG['content_root']):
  265. for filename in files:
  266. if filename.endswith(('md', 'markdown')):
  267. fullpath = os.path.join(root, filename)
  268. if not db.posts.contains(Q.filename == fullpath) and \
  269. not db.pages.contains(Q.filename == fullpath):
  270. e = Entry(fullpath)
  271. yield e, e.id
  272. def _get_last_entries(db):
  273. eids = [post.eid for post in db.posts.all()]
  274. eids = sorted(eids)[-10:][::-1]
  275. entries = [Entry(os.path.join(CONFIG['content_root'],
  276. db.posts.get(eid=eid)['filename'])) for eid in eids]
  277. return entries
  278. def update_index(entries):
  279. """find the last 10 entries in the database and create the main
  280. page.
  281. Each entry in has an eid, so we only get the last 10 eids.
  282. This method also updates the ATOM feed.
  283. """
  284. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  285. context['entries'] = entries
  286. map(lambda x: _render(
  287. context, x[0], os.path.join(CONFIG['output_to'], x[1])),
  288. (('entry_index.html', 'index.html'), ('atom.xml', 'atom.xml')))
  289. def build():
  290. """Incremental build of the website"""
  291. print("\nRendering website now...\n")
  292. print("entries:")
  293. tags = dict()
  294. root = CONFIG['content_root']
  295. for post_id, post in find_new_posts_and_pages(DB):
  296. # entry = post
  297. # this method will also parse the post's tags and
  298. # update the db collection containing the tags.
  299. if post.render():
  300. if post.header['kind'] in ['writing', 'link']:
  301. for tag in post.tags:
  302. tag.posts = [post_id]
  303. tags[tag.name] = tag
  304. print("%s" % post.path)
  305. for name, to in tags.iteritems():
  306. print("updating tag %s" % name)
  307. to.render()
  308. # update index
  309. print("updating index")
  310. update_index(_get_last_entries(DB))
  311. # update archive
  312. print("updating archive")
  313. render_archive(_sort_entries([Entry(p['filename'])
  314. for p in db.posts.all()]))
  315. class StoppableHTTPServer(BaseHTTPServer.HTTPServer): # pragma: no coverage
  316. def server_bind(self):
  317. BaseHTTPServer.HTTPServer.server_bind(self)
  318. self.socket.settimeout(1)
  319. self.run = True
  320. def get_request(self):
  321. while self.run:
  322. try:
  323. sock, addr = self.socket.accept()
  324. sock.settimeout(None)
  325. return (sock, addr)
  326. except socket.timeout:
  327. pass
  328. def stop(self):
  329. self.run = False
  330. def serve(self):
  331. while self.run:
  332. self.handle_request()
  333. def preview(): # pragma: no coverage
  334. """launch an HTTP to preview the website"""
  335. Handler = SimpleHTTPServer.SimpleHTTPRequestHandler
  336. SocketServer.TCPServer.allow_reuse_address = True
  337. port = CONFIG['http_port']
  338. httpd = SocketServer.TCPServer(("", port), Handler)
  339. os.chdir(CONFIG['output_to'])
  340. print("and ready to test at http://127.0.0.1:%d" % CONFIG['http_port'])
  341. print("Hit Ctrl+C to exit")
  342. try:
  343. httpd.serve_forever()
  344. except KeyboardInterrupt:
  345. httpd.shutdown()
  346. def publish(GITDIRECTORY=CONFIG['output_to']): # pragma: no coverage
  347. sp.call('git push', cwd=GITDIRECTORY, shell=True)
  348. def new_post(GITDIRECTORY=CONFIG['output_to'],
  349. kind=KINDS['writing']): # pragma: no coverage
  350. """
  351. This function should create a template for a new post with a title
  352. read from the user input.
  353. Most other fields should be defaults.
  354. TODO: update this function
  355. """
  356. title = raw_input("Give the title of the post: ")
  357. while ':' in title:
  358. title = raw_input("Give the title of the post (':' not allowed): ")
  359. author = CONFIG['author']
  360. date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')
  361. tags = raw_input("Give the tags, separated by ', ':")
  362. published = 'yes'
  363. chronological = 'yes'
  364. summary = ("summary: Type your summary here.")
  365. # make file name
  366. fname = os.path.join(os.getcwd(), 'content', kind['name_plural'],
  367. datetime.datetime.strftime(datetime.datetime.now(),
  368. '%Y'),
  369. date+'-'+title.replace(' ', '-')+'.markdown')
  370. with open(fname, 'w') as npost:
  371. npost.write('---\n')
  372. npost.write('title: %s\n' % title)
  373. npost.write('author: %s\n' % author)
  374. npost.write('published: %s\n' % date)
  375. npost.write('tags: %s\n' % tags)
  376. npost.write('public: %s\n' % published)
  377. npost.write('chronological: %s\n' % chronological)
  378. npost.write('kind: %s\n' % kind['name'])
  379. npost.write('%s' % summary)
  380. npost.write('---\n')
  381. print('%s %s' % (CONFIG['editor'], repr(fname)))
  382. os.system('%s %s' % (CONFIG['editor'], fname))
  383. def clean(GITDIRECTORY=CONFIG['output_to']): # pragma: no coverage
  384. directoriestoclean = ["writings", "notes", "links", "tags", "archive"]
  385. os.chdir(GITDIRECTORY)
  386. for directory in directoriestoclean:
  387. shutil.rmtree(directory)
  388. def dist(SOURCEDIR=os.getcwd()+"/content/",
  389. DESTDIR=CONFIG['raw_content']): # pragma: no coverage
  390. """
  391. sync raw files from SOURCE to DEST
  392. """
  393. sp.call(["rsync", "-avP", SOURCEDIR, DESTDIR], shell=False,
  394. cwd=os.getcwd())
  395. def main(): # pragma: no coverage
  396. parser = argparse.ArgumentParser(
  397. description='blogit - a tool to blog on github.')
  398. parser.add_argument('-b', '--build', action="store_true",
  399. help='convert the markdown files to HTML')
  400. parser.add_argument('-p', '--preview', action="store_true",
  401. help='Launch HTTP server to preview the website')
  402. parser.add_argument('-c', '--clean', action="store_true",
  403. help='clean output files')
  404. parser.add_argument('-n', '--new', action="store_true",
  405. help='create new post')
  406. parser.add_argument('-d', '--dist', action="store_true",
  407. help='sync raw files from SOURCE to DEST')
  408. parser.add_argument('--publish', action="store_true",
  409. help='push built HTML to git upstream')
  410. args = parser.parse_args()
  411. if len(sys.argv) < 2:
  412. parser.print_help()
  413. sys.exit()
  414. if args.clean:
  415. clean()
  416. if args.build:
  417. build()
  418. if args.dist:
  419. dist()
  420. if args.preview:
  421. preview()
  422. if args.new:
  423. new_post()
  424. if args.publish:
  425. publish()
  426. if __name__ == '__main__': # pragma: no coverage
  427. main()