blogit2.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. #!/usr/bin/env python
  2. # ============================================================================
  3. # Blogit.py is free software; you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License, version 3
  5. # as published by the Free Software Foundation;
  6. #
  7. # Blogit.py is distributed in the hope that it will be useful,
  8. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. # GNU General Public License for more details.
  11. #
  12. # You should have received a copy of the GNU General Public License
  13. # along with Blogit.py; if not, write to the Free Software
  14. # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  15. # ============================================================================
  16. # Copyright (C) 2013 Oz Nahum Tiram <nahumoz@gmail.com>
  17. # ============================================================================
  18. # Note about Summary
  19. # has to be 1 line, no '\n' allowed!
  20. """
  21. Summary: |
  22. some summary ...
  23. Your post
  24. """
  25. """
  26. Everything the Header can't have ":" or "..." in it, you can't have title
  27. with ":" it makes markdown break!
  28. """
  29. """
  30. The content directory can contain only markdown or txt files, no images
  31. allowed!
  32. """
  33. import os
  34. import re
  35. import datetime
  36. import argparse
  37. import sys
  38. import operator
  39. from distutils import dir_util
  40. import shutil
  41. from StringIO import StringIO
  42. import codecs
  43. import subprocess as sp
  44. import SimpleHTTPServer
  45. import BaseHTTPServer
  46. import socket
  47. import SocketServer
  48. import thread
  49. try:
  50. import yaml # in debian python-yaml
  51. from jinja2 import Environment, FileSystemLoader # in debian python-jinja2
  52. except ImportError, e:
  53. print e
  54. print "On Debian based system you can install the dependencies with: "
  55. print "apt-get install python-yaml python-jinja2"
  56. sys.exit(1)
  57. try:
  58. import markdown2
  59. renderer = 'md2'
  60. except ImportError, e:
  61. try:
  62. import markdown
  63. renderer = 'md1'
  64. except ImportError, e:
  65. print e
  66. print "try: sudo pip install markdown2"
  67. sys.exit(1)
  68. import tinydb
  69. from tinydb import Query
  70. sys.path.insert(0, os.getcwdu())
  71. from conf import CONFIG, ARCHIVE_SIZE, GLOBAL_TEMPLATE_CONTEXT, KINDS
  72. jinja_env = Environment(loader=FileSystemLoader(CONFIG['templates']))
  73. class DataBase(object):
  74. def __init__(self, path):
  75. _db = tinydb.TinyDB(path)
  76. self.posts = _db.table('posts')
  77. self.tags = _db.table('tags')
  78. self.pages = _db.table('pages')
  79. self.templates = _db.table('templates')
  80. self._db = _db
  81. DB = DataBase(os.path.join(CONFIG['content_root'], 'blogit.db'))
  82. class Tag(object):
  83. def __init__(self, name):
  84. self.name = name
  85. self.prepare()
  86. self.permalink = GLOBAL_TEMPLATE_CONTEXT["site_url"]
  87. self.table = DB.tags
  88. # todo: fix this
  89. #try:
  90. # os.makedirs(destination)
  91. #except:
  92. # pass
  93. Tags = Query()
  94. tag = self.table.get(Tags.name == self.name)
  95. if not tag:
  96. self.table.insert({'name': self.name, 'post_ids': []})
  97. def prepare(self):
  98. _slug = self.name.lower()
  99. _slug = re.sub(r'[;;,. ]', '-', _slug)
  100. self.slug = _slug
  101. @property
  102. def posts(self):
  103. """
  104. return a list of posts tagged with Tag
  105. """
  106. Tags = Query()
  107. tag = self.table.get(Tags.name == self.name)
  108. return tag['post_ids']
  109. @posts.setter
  110. def posts(self, post_ids):
  111. if not isinstance(post_ids, list):
  112. raise ValueError("post_ids must be of type list")
  113. Tags = Query()
  114. tag = self.table.get(Tags.name == self.name)
  115. if tag:
  116. new = set(post_ids) - set(tag['post_ids'])
  117. tag['post_ids'].extend(list(new))
  118. self.table.update({'post_ids': tag['post_ids']}, eids=[tag.eid])
  119. else:
  120. self.table.insert({'name': self.name, 'post_ids': post_ids})
  121. @property
  122. def entries(self):
  123. _entries = []
  124. Posts = Query()
  125. for id in self.posts:
  126. post = DB.posts.get(eid=id)
  127. if not post:
  128. raise ValueError("no post found for eid %s" % id)
  129. entry = Entry(os.path.join(CONFIG['content_root'],
  130. post['filename']))
  131. _entries.append(entry)
  132. return _entries
  133. def render(self):
  134. """Render html page and atom feed"""
  135. self.destination = "%s/tags/%s" % (CONFIG['output_to'],
  136. self.slug)
  137. template = jinja_env.get_template('tag_index.html')
  138. try:
  139. os.makedirs(self.destination)
  140. except OSError:
  141. pass
  142. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  143. context['tag'] = self
  144. context['entries'] = _sort_entries(self.entries)
  145. sorted_entries = _sort_entries(self.entries)
  146. encoding = CONFIG['content_encoding']
  147. render_to = "%s/tags/%s" % (CONFIG['output_to'], self.slug)
  148. jobs = [{'tname': 'tag_index.html',
  149. 'output': codecs.open("%s/index.html" % render_to, 'w', encoding),
  150. 'entries': sorted_entries},
  151. {'tname': 'atom.xml',
  152. 'output': codecs.open("%s/atom.xml" % render_to, 'w', encoding),
  153. 'entries': sorted_entries[:10]}
  154. ]
  155. for j in jobs:
  156. template = jinja_env.get_template(j['tname'])
  157. context['entries'] = j['entries']
  158. html = template.render(context)
  159. j['output'].write(html)
  160. j['output'].close()
  161. return True
  162. class Entry(object):
  163. def __init__(self, path):
  164. super(Entry, self).__init__()
  165. path = path.split('content/')[-1]
  166. self.path = path
  167. self.entry_template = jinja_env.get_template("entry.html")
  168. self.prepare()
  169. def __str__(self):
  170. return self.path
  171. def __repr__(self):
  172. return self.path
  173. @property
  174. def name(self):
  175. return os.path.splitext(os.path.basename(self.path))[0]
  176. @property
  177. def abspath(self):
  178. return os.path.abspath(os.path.join(CONFIG['content_root'], self.path))
  179. @property
  180. def destination(self):
  181. dest = "%s/%s/index.html" % (KINDS[
  182. self.kind]['name_plural'], self.name)
  183. print dest
  184. return os.path.join(CONFIG['output_to'], dest)
  185. @property
  186. def title(self):
  187. return self.header['title']
  188. @property
  189. def summary_html(self):
  190. return "%s" % markdown2.markdown(self.header['summary'].strip())
  191. @property
  192. def credits_html(self):
  193. return "%s" % markdown2.markdown(self.header['credits'].strip())
  194. @property
  195. def summary_atom(self):
  196. summarya = markdown2.markdown(self.header['summary'].strip())
  197. summarya = re.sub("<p>|</p>", "", summarya)
  198. more = '<a href="%s"> continue reading...</a>' % (self.permalink)
  199. return summarya+more
  200. @property
  201. def published_html(self):
  202. if self.kind in ['link', 'note', 'photo']:
  203. return self.header['published'].strftime("%B %d, %Y %I:%M %p")
  204. return self.header['published'].strftime("%B %d, %Y")
  205. @property
  206. def published_atom(self):
  207. return self.published.strftime("%Y-%m-%dT%H:%M:%SZ")
  208. @property
  209. def atom_id(self):
  210. return "tag:%s,%s:%s" % \
  211. (
  212. self.published.strftime("%Y-%m-%d"),
  213. self.permalink,
  214. GLOBAL_TEMPLATE_CONTEXT["site_url"]
  215. )
  216. @property
  217. def body_html(self):
  218. if renderer == 'md2':
  219. return markdown2.markdown(self.body, extras=['fenced-code-blocks',
  220. 'hilite',
  221. "tables"])
  222. if renderer == 'md1':
  223. return markdown.markdown(self.body,
  224. extensions=['fenced_code',
  225. 'codehilite(linenums=False)',
  226. 'tables'])
  227. @property
  228. def permalink(self):
  229. return "/%s/%s" % (KINDS[self.kind]['name_plural'], self.name)
  230. @property
  231. def tags(self):
  232. return [Tag(t) for t in self.header['tags']]
  233. def _read_header(self, file):
  234. header = ['---']
  235. while True:
  236. line = file.readline()
  237. line = line.rstrip()
  238. if not line:
  239. break
  240. header.append(line)
  241. header = yaml.load(StringIO('\n'.join(header)))
  242. # todo: dispatch header to attribute
  243. # todo: parse date from string to a datetime object
  244. return header
  245. def prepare(self):
  246. file = codecs.open(self.abspath, 'r')
  247. self.header = self._read_header(file)
  248. self.date = self.header['published']
  249. for k, v in self.header.items():
  250. try:
  251. setattr(self, k, v)
  252. except:
  253. pass
  254. body = file.readlines()
  255. self.body = ''.join(body)
  256. file.close()
  257. if self.kind == 'link':
  258. from urlparse import urlparse
  259. self.domain_name = urlparse(self.url).netloc
  260. elif self.kind == 'photo':
  261. pass
  262. elif self.kind == 'note':
  263. pass
  264. elif self.kind == 'writing':
  265. pass
  266. def render(self):
  267. if not self.header['public']:
  268. return False
  269. try:
  270. os.makedirs(os.path.dirname(self.destination))
  271. except:
  272. pass
  273. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  274. context['entry'] = self
  275. try:
  276. html = self.entry_template.render(context)
  277. except Exception as e:
  278. print context
  279. print self.path
  280. print e
  281. sys.exit()
  282. destination = codecs.open(
  283. self.destination, 'w', CONFIG['content_encoding'])
  284. destination.write(html)
  285. destination.close()
  286. # before returning write log to csv
  287. # file name, date first seen, date rendered
  288. # self.path , date-first-seen, if rendered datetime.now
  289. return True
  290. class Link(Entry):
  291. def __init__(self, path):
  292. super(Link, self).__init__(path)
  293. @property
  294. def permalink(self):
  295. print "self.url", self.url
  296. return self.url
  297. def _sort_entries(entries):
  298. """Sort all entries by date and reverse the list"""
  299. return list(reversed(sorted(entries, key=operator.attrgetter('date'))))
  300. def render_archive(entries, render_to=None):
  301. """
  302. this function creates the archive page
  303. """
  304. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  305. context['entries'] = entries[ARCHIVE_SIZE:]
  306. template = jinja_env.get_template('archive_index.html')
  307. html = template.render(context)
  308. if not render_to:
  309. render_to = "%s/archive/index.html" % CONFIG['output_to']
  310. dir_util.mkpath("%s/archive" % CONFIG['output_to'])
  311. destination = codecs.open("%s/archive/index.html" % CONFIG[
  312. 'output_to'], 'w', CONFIG['content_encoding'])
  313. destination.write(html)
  314. destination.close()
  315. def find_new_posts(posts_table):
  316. """
  317. Walk content dir, put each post in the database
  318. """
  319. Posts = Query()
  320. for root, dirs, files in os.walk(CONFIG['content_root']):
  321. for filename in files:
  322. if filename.endswith(('md', 'markdown')):
  323. if not posts_table.contains(Posts.filename == filename):
  324. post_id = posts_table.insert({'filename': filename})
  325. yield post_id, filename
  326. def _get_last_entries():
  327. eids = [post.eid for post in DB.posts.all()]
  328. eids = sorted(eids, reverse=True)[-10:]
  329. entries = [Entry(DB.posts.get(eid=eid)['filename']) for eid in eids]
  330. return entries
  331. def update_index():
  332. """find the last 10 entries in the database and create the main
  333. page.
  334. Each entry in has an eid, so we only get the last 10 eids.
  335. This method also update the ATOM feed.
  336. """
  337. entries = _get_last_entries()
  338. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  339. context['entries'] = entries
  340. for name, out in {'entry_index.html': 'index.html',
  341. 'atom.xml': 'atom.xml'}.items():
  342. template = jinja_env.get_template(name)
  343. html = template.render(context)
  344. destination = codecs.open("%s/%s" % (CONFIG['output_to'], out),
  345. 'w', CONFIG['content_encoding'])
  346. destination.write(html)
  347. destination.close()
  348. def new_build():
  349. """
  350. a. For each new post:
  351. 1. render html
  352. 2. find post tags
  353. 3. update atom feeds for old tags
  354. 4. create new atom feeds for new tags
  355. b. update index page
  356. c. update archive page
  357. """
  358. print
  359. print "Rendering website now..."
  360. print
  361. print " entries:"
  362. entries = list()
  363. tags = dict()
  364. root = CONFIG['content_root']
  365. for post_id, post in find_new_posts(DB.posts):
  366. try:
  367. entry = Entry(os.path.join(root, post))
  368. if entry.render():
  369. entries.append(entry)
  370. for tag in entry.tags:
  371. tag.posts = [post_id]
  372. tags[tag.name] = tag
  373. print " %s" % entry.path
  374. except Exception as e:
  375. print "Found some problem in: ", post
  376. print e
  377. print "Please correct this problem ..."
  378. sys.exit(1)
  379. for name, to in tags.iteritems():
  380. print "updating tag %s" % name
  381. to.render()
  382. # update index
  383. print "updating index"
  384. update_index()
  385. # update archive
  386. print "updating archive"
  387. # TODO
  388. class StoppableHTTPServer(BaseHTTPServer.HTTPServer):
  389. def server_bind(self):
  390. BaseHTTPServer.HTTPServer.server_bind(self)
  391. self.socket.settimeout(1)
  392. self.run = True
  393. def get_request(self):
  394. while self.run:
  395. try:
  396. sock, addr = self.socket.accept()
  397. sock.settimeout(None)
  398. return (sock, addr)
  399. except socket.timeout:
  400. pass
  401. def stop(self):
  402. self.run = False
  403. def serve(self):
  404. while self.run:
  405. self.handle_request()
  406. def preview():
  407. """
  408. launch an HTTP to preview the website
  409. """
  410. Handler = SimpleHTTPServer.SimpleHTTPRequestHandler
  411. SocketServer.TCPServer.allow_reuse_address = True
  412. port = CONFIG['http_port']
  413. httpd = SocketServer.TCPServer(("", port), Handler)
  414. os.chdir(CONFIG['output_to'])
  415. print "and ready to test at http://127.0.0.1:%d" % CONFIG['http_port']
  416. print "Hit Ctrl+C to exit"
  417. try:
  418. httpd.serve_forever()
  419. except KeyboardInterrupt:
  420. httpd.shutdown()
  421. def publish(GITDIRECTORY=CONFIG['output_to']):
  422. sp.call('git push', cwd=GITDIRECTORY, shell=True)
  423. def new_post(GITDIRECTORY=CONFIG['output_to'],
  424. kind=KINDS['writing']):
  425. """
  426. This function should create a template for a new post with a title
  427. read from the user input.
  428. Most other fields should be defaults.
  429. """
  430. title = raw_input("Give the title of the post: ")
  431. while ':' in title:
  432. title = raw_input("Give the title of the post (':' not allowed): ")
  433. author = CONFIG['author']
  434. date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')
  435. tags = '[' + raw_input("Give the tags, separated by ', ':") + ']'
  436. published = 'yes'
  437. chronological = 'yes'
  438. summary = ("summary: |\n Type your summary here.\n Do not change the "
  439. "indentation"
  440. "to the left\n ...\n\nStart writing your post here!")
  441. # make file name
  442. fname = os.path.join(os.getcwd(), 'content', kind['name_plural'],
  443. datetime.datetime.strftime(datetime.datetime.now(),
  444. '%Y'),
  445. date+'-'+title.replace(' ', '-')+'.markdown')
  446. with open(fname, 'w') as npost:
  447. npost.write('title: %s\n' % title)
  448. npost.write('author: %s\n' % author)
  449. npost.write('published: %s\n' % date)
  450. npost.write('tags: %s\n' % tags)
  451. npost.write('public: %s\n' % published)
  452. npost.write('chronological: %s\n' % chronological)
  453. npost.write('kind: %s\n' % kind['name'])
  454. npost.write('%s' % summary)
  455. print '%s %s' % (CONFIG['editor'], repr(fname))
  456. os.system('%s %s' % (CONFIG['editor'], fname))
  457. def clean(GITDIRECTORY=CONFIG['output_to']):
  458. directoriestoclean = ["writings", "notes", "links", "tags", "archive"]
  459. os.chdir(GITDIRECTORY)
  460. for directory in directoriestoclean:
  461. shutil.rmtree(directory)
  462. def dist(SOURCEDIR=os.getcwd()+"/content/",
  463. DESTDIR=CONFIG['raw_content']):
  464. """
  465. sync raw files from SOURCE to DEST
  466. """
  467. sp.call(["rsync", "-avP", SOURCEDIR, DESTDIR], shell=False,
  468. cwd=os.getcwd())
  469. def main():
  470. parser = argparse.ArgumentParser(
  471. description='blogit - a tool to blog on github.')
  472. parser.add_argument('-b', '--build', action="store_true",
  473. help='convert the markdown files to HTML')
  474. parser.add_argument('-p', '--preview', action="store_true",
  475. help='Launch HTTP server to preview the website')
  476. parser.add_argument('-c', '--clean', action="store_true",
  477. help='clean output files')
  478. parser.add_argument('-n', '--new', action="store_true",
  479. help='create new post')
  480. parser.add_argument('-d', '--dist', action="store_true",
  481. help='sync raw files from SOURCE to DEST')
  482. parser.add_argument('--publish', action="store_true",
  483. help='push built HTML to git upstream')
  484. args = parser.parse_args()
  485. if len(sys.argv) < 2:
  486. parser.print_help()
  487. sys.exit()
  488. if args.clean:
  489. clean()
  490. if args.build:
  491. new_build()
  492. if args.dist:
  493. dist()
  494. if args.preview:
  495. preview()
  496. if args.new:
  497. new_post()
  498. if args.publish:
  499. publish()
  500. if __name__ == '__main__':
  501. main()