#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Liste postes
# Copyright (C) 2010 Jean-Marie Favreau <jean-marie.favreau@ens-cachan.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License 3
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.


"""Usage: liste-postes.py [options]

Options:
    -s, --section=NB       Numéro de la section (par défaut, 27)
    -t, --type=POSTE       Type de poste. Exemple: "MCF" (défaut), "PR"
    -r, --repertoire=REP   Répertoire où seront téléchargés les pdf de
                           description des postes (par défaut, postes-publies)
    -h, --help             Affiche cette aide
"""

import urllib2
import sys
import re
import os
import shutil
import getopt

try:
    my_getopt = getopt.gnu_getopt
except AttributeError:
    my_getopt = getopt.getopt

from time import time

from pdftools.pdffile import PDFDocument, PopGS, PushGS
from pdftools.pdfpath import Path
from pdftools.pdftext import Text


def usage():
    print __doc__
    sys.exit(0)


# liste des répertoires où sont stockés les fichiers
liste_urls_dir = [ "https://www.galaxie.enseignementsup-recherche.gouv.fr/ensup/ListesPostesPublies/ANTEE/2010_1/", "https://www.galaxie.enseignementsup-recherche.gouv.fr/ensup/ListesPostesPublies/FIDIS/" ]


def getPreviousDirectory(repertoire, date):
  previousDate = 0
  for f in os.listdir(repertoire):
    date = 0
    try:
      fdate = int(f)
    except:
      path
    if fdate != 0 and fdate != date:
      if previousDate < fdate:
	previousDate = fdate
  if previousDate != 0:
    return repertoire + os.sep + str(previousDate)
  else:
    return ""

def isDate(text):
  return re.match("[0-9][0-9]/[0-9][0-9]/[0-9][0-9][0-9][0-9]", text) != None
def getMonth(text):
  return re.split("/", text)[1]
def getDay(text):
  return re.split("/", text)[0]

def getFile(postes, filename = ""):
  opener1 = urllib2.build_opener()
  pdfpostepage = opener1.open(postes)
  pdfpostes = pdfpostepage.read()

  if filename == "":
    filename = mktemp()
  fout = open(filename, "wb")
  fout.write(pdfpostes)
  fout.close()
  return filename

def readPostes(filename, rep_destination, idSection, typeDePoste, precedent):
  doc = PDFDocument(filename, 1)

  inSection = False
  predClass = False
  values = []
  datesfin = []

  # load document
  for idpage in range(doc.count_pages()):
    page = doc.read_page(idpage + 1)
    content = page.read_contents()
    for c in content.contents:
      curClass = c.__class__
      if curClass == PopGS and predClass == Path:
	inSection = True
	values.append([])
	datesfin.append("")
      elif inSection:
	if curClass == list:
	  values[-1].append("")
	  for v in c:
	    if v.__class__ == Text:
	      values[-1][-1] += v.text
	      if isDate(v.text):
		datesfin[-1] = v.text
	#else: print "Classe inconnue: ", c
      elif curClass == PushGS:
	  inSection = False
      predClass = c.__class__

  # compute the column that corresponds to the section
  idC = 0
  for i in values[0]:
    if i == "Corps":
      corpsSection = idC
    if i == "Section":
      sectionColumn = idC
    if i == "Section2":
      sectionColumn2 = idC
    if i == "Section3":
      sectionColumn3 = idC
    idC += 1
  if sectionColumn.__class__ != int:
    print "Erreur: pas de colonne section détectée."
    sys.exit(1)

  idCur = 0
  nbNew = 0
  for poste in values[1:]:
    section = 0
    section2 = 0
    section3 = 0
    corps = ""
    try:
      date = datesfin[idCur]
      idCur += 1
      msg = " "
      if int(getMonth(date)) < 3 or (int(getMonth(date)) == 3 and int(getDay(date)) < 15):
	msg = "!!!Attention!!! pour "
      msg += poste[1]
      corps = poste[corpsSection]
      section = int(poste[sectionColumn])
      if sectionColumn2.__class__ == int:
	section2 = int(poste[sectionColumn2])
      else:
	section2 = section
      if sectionColumn3.__class__ == int:
	section3 = int(poste[sectionColumn3])
      else:
	section3 = section
    except:
      pass
    if (section == idSection or section2 == idSection or section3 == idSection) and corps == typeDePoste:
      nomPoste = rep_destination + os.sep + poste[2] + ".pdf"
      precedentNomPoste = precedent + os.sep + poste[2] + ".pdf"
      if precedent == "" or not os.path.exists(precedentNomPoste):
	nbNew += 1
	reussite = False
	for prefixe in liste_urls_dir:
	  url= prefixe + poste[0] + "/FOPC_" + poste[0] + "_" + poste[2] + ".pdf"
	  try:
	    getFile(url, nomPoste)
	    print " Récupération de la fiche du poste ", poste[0], ",", poste[2], ",", date, ",", msg
	    reussite = True
	    break
	  except:
	    pass
	if not reussite:
	  print " !!!Attention!!! Impossible de récupérer le fichier associé au poste", poste[0], ",", poste[2], ",", date, ",", msg
      else:
	shutil.copy(precedentNomPoste, nomPoste)
  print " Nombre de nouveaux:", nbNew


### début du script

# récupération des informations de la ligne de commande
try:
  opts, args = my_getopt(sys.argv[1:], "s:t:r:h", ["section=", "type=", "repertoire=", "help"])
except getopt.GetoptError, msg:
  print "Error: ", msg
  usage()
  sys.exit(2)


repertoire = "postes-publies"
section_voulue = 27
type_poste = "MCF"

for o, a in opts:
  if o in ("-s", "--section"):
    try:
      section_voulue = int(a)
    except:
      print "Attention, la section doit être un entier"
      sys.exit(1)
  if o in ("-t", "--type"):
    type_poste = a
    if not type_poste in ("PR", "MCF"):
      print "Attention, possible que le type de poste soit inconnu:", type_poste
  if o in ("-r", "--repertoire"):
    repertoire = a
  if o in ("-h", "--help"):
    usage()
    sys.exit(0)


if not os.path.exists(repertoire):
  print "Création du répertoire pour les postes"
  os.mkdir(repertoire)
date = int(time())
repertoire_complet = repertoire + os.sep + str(date)

precedent = getPreviousDirectory(repertoire, date)

if os.path.exists(repertoire_complet):
  print "Attention, le répertoire", repertoire_complet, "existe déjà"
  print "Abandon"
  sys.exit(1)
else:
  os.mkdir(repertoire_complet)


print "> Nouveaux postes publiés"

tmp1 = repertoire_complet + os.sep + "postes.pdf"
url1 = "https://www.galaxie.enseignementsup-recherche.gouv.fr/ensup/ListesPostesPublies/Emplois_publies_TrieParCorps.pdf"
getFile(url1, tmp1)
readPostes(tmp1, repertoire_complet, section_voulue, type_poste, precedent)

print "> Nouveaux postes pré-publiés"

tmp2 = repertoire_complet + os.sep + "prepostes.pdf"
url2 = "https://www.galaxie.enseignementsup-recherche.gouv.fr/ensup/ListesPostesPublies/Emplois_prepublies_TrieParCorps.pdf"
getFile(url2, tmp2)
readPostes(tmp2, repertoire_complet, section_voulue, type_poste, precedent)