|
@@ -0,0 +1,168 @@
|
|
|
+#!/bin/python3
|
|
|
+
|
|
|
+# Programme assez bordélique pour parser des tickets de caisse Super U
|
|
|
+# Nécessite une base de donnée SQLite avec la structure suivante :
|
|
|
+
|
|
|
+'''
|
|
|
+CREATE TABLE "achats" (
|
|
|
+ "id" INTEGER,
|
|
|
+ "associated_id" INTEGER,
|
|
|
+ "quantite" INTEGER NOT NULL,
|
|
|
+ "article" TEXT NOT NULL,
|
|
|
+ "categorie" TEXT,
|
|
|
+ "prix" INTEGER NOT NULL,
|
|
|
+ "date" TEXT NOT NULL,
|
|
|
+ "heure" TEXT NOT NULL,
|
|
|
+ "ville" TEXT NOT NULL,
|
|
|
+ PRIMARY KEY("id" AUTOINCREMENT)
|
|
|
+);
|
|
|
+'''
|
|
|
+
|
|
|
+import sys
|
|
|
+import os
|
|
|
+import PyPDF2
|
|
|
+import sqlite3
|
|
|
+
|
|
|
+def parsePDF(f):
|
|
|
+ with open(f, 'rb') as f:
|
|
|
+ reader = PyPDF2.PdfReader(f)
|
|
|
+ contents = reader.getPage(0).extractText().split('\n')
|
|
|
+
|
|
|
+ # Detecte si c'est un ticket de caisse ou ticket client
|
|
|
+ if ("CARTE BANCAIRE" in contents[4]) or ("CARTE BANCAIRE" in contents[1]):
|
|
|
+ print('Pattern found : don\'t look like a good ticket. Next one...\n')
|
|
|
+ return ''
|
|
|
+ else:
|
|
|
+ print('No pattern found : seems to be a good one. Parsing it...')
|
|
|
+ #print(contents)
|
|
|
+ x = 0
|
|
|
+ for i in contents:
|
|
|
+ print("[" + str(x) + "] " + contents[x])
|
|
|
+ x=x+1
|
|
|
+ if "===========" in contents[x]:
|
|
|
+ print("matching with END pattern")
|
|
|
+ return contents[3:x]
|
|
|
+ elif "-----------" in contents[x]:
|
|
|
+ print("matching with END pattern")
|
|
|
+ return contents[3:x]
|
|
|
+ else:
|
|
|
+ pass
|
|
|
+
|
|
|
+def parseArticles(content):
|
|
|
+ if len(content) == 0:
|
|
|
+ return ""
|
|
|
+
|
|
|
+ x = 0
|
|
|
+ for i in content:
|
|
|
+ #print("[" + str(x) + "] " + content[x])
|
|
|
+ x=x+1
|
|
|
+
|
|
|
+ ville = content[2]
|
|
|
+ info = content[12].split()
|
|
|
+ date = content[12].split()[2]
|
|
|
+ heure = content[12].split()[3]
|
|
|
+
|
|
|
+ article = None
|
|
|
+ z=-1
|
|
|
+ for line in content[14:]:
|
|
|
+ z=z+1
|
|
|
+ print(line)
|
|
|
+ if ">>>>" in line:
|
|
|
+ categorie = line.replace('>', '')[2:]
|
|
|
+
|
|
|
+ elif "Pourcentage" in line:
|
|
|
+ continue
|
|
|
+
|
|
|
+ elif not "€" in line:
|
|
|
+ article = line.split(" ")[0][1:]
|
|
|
+ continue
|
|
|
+
|
|
|
+ elif "€" in line:
|
|
|
+ #
|
|
|
+ # Si article pas nul, c'est qu'on est sur un multi ligne
|
|
|
+ #
|
|
|
+ if article != None:
|
|
|
+ print(line.split())
|
|
|
+ if (" x " in line) and ("€/kg" in line) and (" kg " in line):
|
|
|
+ quantite = line.split()[0]
|
|
|
+ prix = line.split()[3] + " €"
|
|
|
+
|
|
|
+ if (" x " in line) and ("€" in line) and (not "€/kg" in line):
|
|
|
+ quantite = line.split()[0]
|
|
|
+ prix = line.split()[2] + " €"
|
|
|
+
|
|
|
+ else:
|
|
|
+ print("AAAAAAAAAA")
|
|
|
+ print(line.split(" "))
|
|
|
+
|
|
|
+ # On à une ligne type poid avec ou sans prix... les relous quoi !
|
|
|
+ # Nom d'article, mais le reste est ligne du dessous
|
|
|
+ # donc on sort de la boucle quand on à notre variable
|
|
|
+ article = line.split(" ")[0][1:]
|
|
|
+ quantite = 1
|
|
|
+ prix = line.split(" ")[len(line.split(" ")) - 2]
|
|
|
+ print("on sort")
|
|
|
+
|
|
|
+
|
|
|
+ # On pousse la requête !
|
|
|
+ if (article != None) and (prix != None) and (quantite != None):
|
|
|
+ print("=> Date / Heure : " + date + " " + heure)
|
|
|
+ print("=> Catégorie : " + categorie)
|
|
|
+ print("=> Article : " + article)
|
|
|
+ print("=> Quantité/poid : " + str(quantite))
|
|
|
+ print("=> Prix : " + prix)
|
|
|
+ print("\n")
|
|
|
+
|
|
|
+ sql = ''' INSERT INTO achats(quantite,article,categorie,prix,date,heure,ville)
|
|
|
+ VALUES(?,?,?,?,?,?,?) '''
|
|
|
+ cur = conn.cursor()
|
|
|
+ cur.execute(sql, (str(quantite), article, categorie, prix, date, heure, ville ))
|
|
|
+ conn.commit()
|
|
|
+
|
|
|
+ article = None
|
|
|
+ prix = None
|
|
|
+ quantite = None
|
|
|
+
|
|
|
+ # Comme on est des cochons et que l'algo est trop mauvais, on peut avoir des erreurs de parsing
|
|
|
+ # on va donc clean les entrées qui ont des valeurs nulles.
|
|
|
+# elif (article != None) and (prix == "") and (quantite != None):
|
|
|
+# continue
|
|
|
+
|
|
|
+def create_connection(db_file):
|
|
|
+ conn = None
|
|
|
+ try:
|
|
|
+ conn = sqlite3.connect(db_file)
|
|
|
+ except Error as e:
|
|
|
+ print(e)
|
|
|
+ return conn
|
|
|
+
|
|
|
+def ajout_article(conn, project):
|
|
|
+ sql = ''' INSERT INTO achats(name,begin_date,end_date)
|
|
|
+ VALUES(?,?,?) '''
|
|
|
+ cur = conn.cursor()
|
|
|
+ cur.execute(sql, project)
|
|
|
+ conn.commit()
|
|
|
+ return cur.lastrowid
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+
|
|
|
+ if len(sys.argv) != 2:
|
|
|
+ print('Erreur')
|
|
|
+ exit(1)
|
|
|
+
|
|
|
+ print(f'Script name is {sys.argv[0]}')
|
|
|
+ print(f'Path with PDF files is {sys.argv[1]}')
|
|
|
+
|
|
|
+ # to store files in a list
|
|
|
+ list = []
|
|
|
+
|
|
|
+ conn = create_connection("database")
|
|
|
+ # dirs=directories
|
|
|
+ for (root, dirs, file) in os.walk(sys.argv[1]):
|
|
|
+ for f in file:
|
|
|
+ if '.pdf' in f:
|
|
|
+ print('=> Play with file : ' + f)
|
|
|
+ parseArticles(parsePDF(sys.argv[1]+"/"+f))
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+ print("terminé")
|