Pierre Bourdin 1 سال پیش
والد
کامیت
e931549192
2فایلهای تغییر یافته به همراه67 افزوده شده و 45 حذف شده
  1. BIN
      database
  2. 67 45
      main.py

BIN
database


+ 67 - 45
main.py

@@ -16,45 +16,75 @@ CREATE TABLE "achats" (
 	"ville"	TEXT NOT NULL,
 	PRIMARY KEY("id" AUTOINCREMENT)
 );
-'''
 
-import sys
+tips :
+
+'''
 import os
 import PyPDF2
 import sqlite3
+import argparse
+
+parser = argparse.ArgumentParser(description='This program parse Super U tickets and put datas in a SQLite database.',
+                                 epilog='Enjoy the program! :)')
+
+parser.add_argument('-d', '--db', help='SQLite database file', required=True)
+parser.add_argument('-p', '--path', help='Path to PDF files', required=True)
+parser.add_argument('-v', '--verbose', help='Verbose mode', action='store_true')
+args = parser.parse_args()
+
+if args.verbose:
+    print('Verbose mode activated')
+    print(args)
 
 def parsePDF(f):
     with open(f, 'rb') as f:
+        if args.verbose:
+            print('Opening file : ' + f.name)
+
         reader = PyPDF2.PdfReader(f)
-        contents = reader.getPage(0).extractText().split('\n')
+        contents = reader.pages[0].extract_text().split('\n')
 
         # Detecte si c'est un ticket de caisse ou ticket client
         if ("CARTE BANCAIRE" in contents[4]) or ("CARTE BANCAIRE" in contents[1]):
-            print('Pattern found : don\'t look like a good ticket. Next one...\n')
+            if args.verbose:
+                print('Pattern found : don\'t look like a good ticket. Next one...\n')
             return ''
         else:
-            print('No pattern found : seems to be a good one. Parsing it...')
-            #print(contents)
+            if args.verbose:
+                print('No pattern found : seems to be a good one. Parsing it...')
+                print(contents)
             x = 0
             for i in contents:
-                print("[" + str(x) + "] " + contents[x])
+                if args.verbose:
+                    print("[" + str(x) + "] " + contents[x])
                 x=x+1
                 if "==========="  in contents[x]:
-                    print("matching with END pattern")
+                    if args.verbose:
+                        print("matching with END pattern")
                     return contents[3:x]
                 elif "-----------" in contents[x]:
-                    print("matching with END pattern")
+                    if args.verbose:
+                        print("matching with END pattern")
                     return contents[3:x]
                 else:
                     pass
 
-def parseArticles(content):
+def parseArticles(content, f):
     if len(content) == 0:
         return ""
+    sql = ''' SELECT COUNT(*)  FROM "main"."achats" WHERE "original_file" LIKE '%''' + f + '''%'; '''
+    cur = conn.cursor()
+    cur.execute(sql)
+    conn.commit()
+    if cur.fetchone()[0] != 0:
+        print("Ticket déjà présent dans la base de donnée. Ignoré.")
+        return ""
 
     x = 0
     for i in content:
-        #print("[" + str(x) + "] " + content[x])
+        if args.verbose:
+            print("[" + str(x) + "] " + content[x])
         x=x+1
 
     ville = content[2]
@@ -66,7 +96,8 @@ def parseArticles(content):
     z=-1
     for line in content[14:]:
         z=z+1
-        print(line)
+        if args.verbose:
+            print(line)
         if ">>>>" in line:
             categorie = line.replace('>', '')[2:]
 
@@ -78,11 +109,10 @@ def parseArticles(content):
             continue
 
         elif "€" in line:
-            #
             # Si article pas nul, c'est qu'on est sur un multi ligne
-            #
             if article != None:
-                print(line.split())
+                if args.verbose:
+                    print(line.split())
                 if (" x " in line) and ("€/kg" in line) and (" kg " in line):
                     quantite = line.split()[0]
                     prix = line.split()[3] + " €"
@@ -92,42 +122,38 @@ def parseArticles(content):
                     prix = line.split()[2] + " €"
 
             else:
-                print("AAAAAAAAAA")
-                print(line.split("  "))
-
+                if args.verbose:
+                    print(line.split("  "))
                 # On à une ligne type poid avec ou sans prix... les relous quoi !
                 # Nom d'article, mais le reste est ligne du dessous
                 # donc on sort de la boucle quand on à notre variable
                 article = line.split("  ")[0][1:]
                 quantite = 1
                 prix = line.split("  ")[len(line.split("  ")) - 2]
-                print("on sort")
 
+                if args.verbose:
+                    print("on sort")
 
         # On pousse la requête !
         if (article != None) and (prix != None) and (quantite != None):
-            print("=> Date / Heure : " + date + " " + heure)
-            print("=> Catégorie : " + categorie)
-            print("=> Article : " + article)
-            print("=> Quantité/poid : " + str(quantite))
-            print("=> Prix : " + prix)
-            print("\n")
-
-            sql = ''' INSERT INTO achats(quantite,article,categorie,prix,date,heure,ville)
-                      VALUES(?,?,?,?,?,?,?) '''
+            if args.verbose == True:
+                print("=> Date / Heure : " + date + " " + heure)
+                print("=> Catégorie : " + categorie)
+                print("=> Article : " + article)
+                print("=> Quantité/poid : " + str(quantite))
+                print("=> Prix : " + prix)
+                print("\n")
+
+            sql = ''' INSERT INTO achats(quantite,article,categorie,prix,date,heure,ville,original_file)
+                      VALUES(?,?,?,?,?,?,?,?) '''
             cur = conn.cursor()
-            cur.execute(sql, (str(quantite), article, categorie, prix, date, heure, ville ))
+            cur.execute(sql, (str(quantite), article, categorie, prix, date, heure, ville, f))
             conn.commit()
 
             article = None
             prix = None
             quantite = None
 
-        # Comme on est des cochons et que l'algo est trop mauvais, on peut avoir des erreurs de parsing
-        # on va donc clean les entrées qui ont des valeurs nulles.
-#        elif (article != None) and (prix == "") and (quantite != None):
-#            continue
-
 def create_connection(db_file):
     conn = None
     try:
@@ -145,24 +171,20 @@ def ajout_article(conn, project):
     return cur.lastrowid
 
 if __name__ == '__main__':
-
-    if len(sys.argv) != 2:
-        print('Erreur')
-        exit(1)
-
-    print(f'Script name is {sys.argv[0]}')
-    print(f'Path with PDF files is {sys.argv[1]}')
+    if args.verbose:
+        print('Database filename : ', args.db)
+        print('PDF folder : ', args.path)
 
     # to store files in a list
     list = []
 
-    conn = create_connection("database")
+    conn = create_connection(args.db)
     # dirs=directories
-    for (root, dirs, file) in os.walk(sys.argv[1]):
+    for (root, dirs, file) in os.walk(args.path):
         for f in file:
             if '.pdf' in f:
                 print('=> Play with file : ' + f)
-                parseArticles(parsePDF(sys.argv[1]+"/"+f))
+                parseArticles(parsePDF(args.path+f), f)
     conn.close()
 
-    print("terminé")
+    print("terminé")