|
@@ -16,45 +16,75 @@ CREATE TABLE "achats" (
|
|
|
"ville" TEXT NOT NULL,
|
|
|
PRIMARY KEY("id" AUTOINCREMENT)
|
|
|
);
|
|
|
-'''
|
|
|
|
|
|
-import sys
|
|
|
+tips :
|
|
|
+
|
|
|
+'''
|
|
|
import os
|
|
|
import PyPDF2
|
|
|
import sqlite3
|
|
|
+import argparse
|
|
|
+
|
|
|
+parser = argparse.ArgumentParser(description='This program parse Super U tickets and put datas in a SQLite database.',
|
|
|
+ epilog='Enjoy the program! :)')
|
|
|
+
|
|
|
+parser.add_argument('-d', '--db', help='SQLite database file', required=True)
|
|
|
+parser.add_argument('-p', '--path', help='Path to PDF files', required=True)
|
|
|
+parser.add_argument('-v', '--verbose', help='Verbose mode', action='store_true')
|
|
|
+args = parser.parse_args()
|
|
|
+
|
|
|
+if args.verbose:
|
|
|
+ print('Verbose mode activated')
|
|
|
+ print(args)
|
|
|
|
|
|
def parsePDF(f):
|
|
|
with open(f, 'rb') as f:
|
|
|
+ if args.verbose:
|
|
|
+ print('Opening file : ' + f.name)
|
|
|
+
|
|
|
reader = PyPDF2.PdfReader(f)
|
|
|
- contents = reader.getPage(0).extractText().split('\n')
|
|
|
+ contents = reader.pages[0].extract_text().split('\n')
|
|
|
|
|
|
# Detecte si c'est un ticket de caisse ou ticket client
|
|
|
if ("CARTE BANCAIRE" in contents[4]) or ("CARTE BANCAIRE" in contents[1]):
|
|
|
- print('Pattern found : don\'t look like a good ticket. Next one...\n')
|
|
|
+ if args.verbose:
|
|
|
+ print('Pattern found : don\'t look like a good ticket. Next one...\n')
|
|
|
return ''
|
|
|
else:
|
|
|
- print('No pattern found : seems to be a good one. Parsing it...')
|
|
|
- #print(contents)
|
|
|
+ if args.verbose:
|
|
|
+ print('No pattern found : seems to be a good one. Parsing it...')
|
|
|
+ print(contents)
|
|
|
x = 0
|
|
|
for i in contents:
|
|
|
- print("[" + str(x) + "] " + contents[x])
|
|
|
+ if args.verbose:
|
|
|
+ print("[" + str(x) + "] " + contents[x])
|
|
|
x=x+1
|
|
|
if "===========" in contents[x]:
|
|
|
- print("matching with END pattern")
|
|
|
+ if args.verbose:
|
|
|
+ print("matching with END pattern")
|
|
|
return contents[3:x]
|
|
|
elif "-----------" in contents[x]:
|
|
|
- print("matching with END pattern")
|
|
|
+ if args.verbose:
|
|
|
+ print("matching with END pattern")
|
|
|
return contents[3:x]
|
|
|
else:
|
|
|
pass
|
|
|
|
|
|
-def parseArticles(content):
|
|
|
+def parseArticles(content, f):
|
|
|
if len(content) == 0:
|
|
|
return ""
|
|
|
+ sql = ''' SELECT COUNT(*) FROM "main"."achats" WHERE "original_file" LIKE '%''' + f + '''%'; '''
|
|
|
+ cur = conn.cursor()
|
|
|
+ cur.execute(sql)
|
|
|
+ conn.commit()
|
|
|
+ if cur.fetchone()[0] != 0:
|
|
|
+ print("Ticket déjà présent dans la base de donnée. Ignoré.")
|
|
|
+ return ""
|
|
|
|
|
|
x = 0
|
|
|
for i in content:
|
|
|
- #print("[" + str(x) + "] " + content[x])
|
|
|
+ if args.verbose:
|
|
|
+ print("[" + str(x) + "] " + content[x])
|
|
|
x=x+1
|
|
|
|
|
|
ville = content[2]
|
|
@@ -66,7 +96,8 @@ def parseArticles(content):
|
|
|
z=-1
|
|
|
for line in content[14:]:
|
|
|
z=z+1
|
|
|
- print(line)
|
|
|
+ if args.verbose:
|
|
|
+ print(line)
|
|
|
if ">>>>" in line:
|
|
|
categorie = line.replace('>', '')[2:]
|
|
|
|
|
@@ -78,11 +109,10 @@ def parseArticles(content):
|
|
|
continue
|
|
|
|
|
|
elif "€" in line:
|
|
|
- #
|
|
|
# Si article pas nul, c'est qu'on est sur un multi ligne
|
|
|
- #
|
|
|
if article != None:
|
|
|
- print(line.split())
|
|
|
+ if args.verbose:
|
|
|
+ print(line.split())
|
|
|
if (" x " in line) and ("€/kg" in line) and (" kg " in line):
|
|
|
quantite = line.split()[0]
|
|
|
prix = line.split()[3] + " €"
|
|
@@ -92,42 +122,38 @@ def parseArticles(content):
|
|
|
prix = line.split()[2] + " €"
|
|
|
|
|
|
else:
|
|
|
- print("AAAAAAAAAA")
|
|
|
- print(line.split(" "))
|
|
|
-
|
|
|
+ if args.verbose:
|
|
|
+ print(line.split(" "))
|
|
|
# On à une ligne type poid avec ou sans prix... les relous quoi !
|
|
|
# Nom d'article, mais le reste est ligne du dessous
|
|
|
# donc on sort de la boucle quand on à notre variable
|
|
|
article = line.split(" ")[0][1:]
|
|
|
quantite = 1
|
|
|
prix = line.split(" ")[len(line.split(" ")) - 2]
|
|
|
- print("on sort")
|
|
|
|
|
|
+ if args.verbose:
|
|
|
+ print("on sort")
|
|
|
|
|
|
# On pousse la requête !
|
|
|
if (article != None) and (prix != None) and (quantite != None):
|
|
|
- print("=> Date / Heure : " + date + " " + heure)
|
|
|
- print("=> Catégorie : " + categorie)
|
|
|
- print("=> Article : " + article)
|
|
|
- print("=> Quantité/poid : " + str(quantite))
|
|
|
- print("=> Prix : " + prix)
|
|
|
- print("\n")
|
|
|
-
|
|
|
- sql = ''' INSERT INTO achats(quantite,article,categorie,prix,date,heure,ville)
|
|
|
- VALUES(?,?,?,?,?,?,?) '''
|
|
|
+ if args.verbose == True:
|
|
|
+ print("=> Date / Heure : " + date + " " + heure)
|
|
|
+ print("=> Catégorie : " + categorie)
|
|
|
+ print("=> Article : " + article)
|
|
|
+ print("=> Quantité/poid : " + str(quantite))
|
|
|
+ print("=> Prix : " + prix)
|
|
|
+ print("\n")
|
|
|
+
|
|
|
+ sql = ''' INSERT INTO achats(quantite,article,categorie,prix,date,heure,ville,original_file)
|
|
|
+ VALUES(?,?,?,?,?,?,?,?) '''
|
|
|
cur = conn.cursor()
|
|
|
- cur.execute(sql, (str(quantite), article, categorie, prix, date, heure, ville ))
|
|
|
+ cur.execute(sql, (str(quantite), article, categorie, prix, date, heure, ville, f))
|
|
|
conn.commit()
|
|
|
|
|
|
article = None
|
|
|
prix = None
|
|
|
quantite = None
|
|
|
|
|
|
- # Comme on est des cochons et que l'algo est trop mauvais, on peut avoir des erreurs de parsing
|
|
|
- # on va donc clean les entrées qui ont des valeurs nulles.
|
|
|
-# elif (article != None) and (prix == "") and (quantite != None):
|
|
|
-# continue
|
|
|
-
|
|
|
def create_connection(db_file):
|
|
|
conn = None
|
|
|
try:
|
|
@@ -145,24 +171,20 @@ def ajout_article(conn, project):
|
|
|
return cur.lastrowid
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
-
|
|
|
- if len(sys.argv) != 2:
|
|
|
- print('Erreur')
|
|
|
- exit(1)
|
|
|
-
|
|
|
- print(f'Script name is {sys.argv[0]}')
|
|
|
- print(f'Path with PDF files is {sys.argv[1]}')
|
|
|
+ if args.verbose:
|
|
|
+ print('Database filename : ', args.db)
|
|
|
+ print('PDF folder : ', args.path)
|
|
|
|
|
|
# to store files in a list
|
|
|
list = []
|
|
|
|
|
|
- conn = create_connection("database")
|
|
|
+ conn = create_connection(args.db)
|
|
|
# dirs=directories
|
|
|
- for (root, dirs, file) in os.walk(sys.argv[1]):
|
|
|
+ for (root, dirs, file) in os.walk(args.path):
|
|
|
for f in file:
|
|
|
if '.pdf' in f:
|
|
|
print('=> Play with file : ' + f)
|
|
|
- parseArticles(parsePDF(sys.argv[1]+"/"+f))
|
|
|
+ parseArticles(parsePDF(args.path+f), f)
|
|
|
conn.close()
|
|
|
|
|
|
- print("terminé")
|
|
|
+ print("terminé")
|