def insertDialog(db, documents): cur = db.cursor() deleteQaSql = "DELETE FROM qa" try: cur.execute(deleteQaSql) db.commit() except Exception as e: print("Error %s while execute: %s" % (e, deleteQaSql)) 。。。 try: cur.execute(executeSql) db.commit() except Exception as e: print("Error %s for execute sql: %s" % (e, executeSql)) db.rollback() curId += 1 continue dialogList = mongoDialogs() insertDialog(db, dialogList) db.close()
出错:
File "/Users/crifan/dev/dev_root/xxx/nlp/dialog/data/MongodbToMysql.py", line 286, in <module> main() File "/Users/crifan/dev/dev_root/xxx/nlp/dialog/data/MongodbToMysql.py", line 282, in main insertDialog(db, dialogList) File "/Users/crifan/dev/dev_root/xxx/nlp/dialog/data/MongodbToMysql.py", line 129, in insertDialog for document in documents: File "/Users/crifan/dev/dev_root/xxx/nlp/dialog/data/MongodbToMysql.py", line 69, in mongoDialogs for document in collection.find(): File "/Users/crifan/.virtualenvs/xx-gXiJ4vtz/lib/python3.6/site-packages/pymongo/cursor.py", line 1169, in next if len(self.__data) or self._refresh(): File "/Users/crifan/.virtualenvs/xx-gXiJ4vtz/lib/python3.6/site-packages/pymongo/cursor.py", line 1106, in _refresh self.__send_message(g) File "/Users/crifan/.virtualenvs/xx-gXiJ4vtz/lib/python3.6/site-packages/pymongo/cursor.py", line 975, in __send_message helpers._check_command_response(first) File "/Users/crifan/.virtualenvs/xx-gXiJ4vtz/lib/python3.6/site-packages/pymongo/helpers.py", line 142, in _check_command_response raise CursorNotFound(errmsg, code, response) pymongo.errors.CursorNotFound: Cursor not found, cursor id: 31763154712
python – pymongo.errors.CursorNotFound: cursor id ‘…’ not valid at server – Stack Overflow
超时导致的?
估计是我debug,暂定调试时间太长,导致cursor失效?
pymongo.errors.CursorNotFound: Cursor not found – 简书
pymongo.errors.CursorNotFound: Cursor not found, cursor id: 124891571478 – CSDN博客
【pymongo】mongodb cursor id not valid error – 匡子语 – 博客园
重新去调试,不debug了,看看联系执行,确保在10分钟内,是否会超时
然后后来由于后续代码运行时间太长,也没法,不方便验证是否正常。
总之知道了是这个问题就好了。
后续如果再遇到,可以去设置连接永远不超时即可。
【后记 20180629】
后来本地连接Mongo调试又出现同样错误:
File "/Users/crifan/dev/dev_root/xxx/processData/mysqlQa/mongoDialogToMysqlQa.py", line 262, in insertDialog for document in dialogGenerator: File "/Users/crifan/dev/dev_root/xxx/processData/mysqlQa/mongoDialogToMysqlQa.py", line 109, in mongoDialogGenerator for eachDialog in dialogCollection.find(): File "/Users/crifan/.virtualenvs/xxx-gXiJ4vtz/lib/python3.6/site-packages/pymongo/cursor.py", line 1169, in next if len(self.__data) or self._refresh(): File "/Users/crifan/.virtualenvs/xxx-gXiJ4vtz/lib/python3.6/site-packages/pymongo/cursor.py", line 1106, in _refresh self.__send_message(g) File "/Users/crifan/.virtualenvs/xxx-gXiJ4vtz/lib/python3.6/site-packages/pymongo/cursor.py", line 975, in __send_message helpers._check_command_response(first) File "/Users/crifan/.virtualenvs/xxx-gXiJ4vtz/lib/python3.6/site-packages/pymongo/helpers.py", line 142, in _check_command_response raise CursorNotFound(errmsg, code, response) pymongo.errors.CursorNotFound: cursor id 31095472518 not found
很明显是:
mongo连接超时了。
所以必须是要去设置超时时间(增加,或者永久不超时)
pymongo.errors.CursorNotFound: cursor id not found
python – pymongo.errors.CursorNotFound: cursor id ‘…’ not valid at server – Stack Overflow
此处调试期间,干脆简单点去:
timeout=False
no_cursor_timeout=True
之类的吧
python – pymongo.errors.CursorNotFound: cursor id ‘…’ not found at server – Stack Overflow
mongodb – CursorNotFound error in Pymongo – Stack Overflow
pymongo.errors.CursorNotFound: Cursor not found – 简书
pymongo.errors.CursorNotFound: Cursor not found, cursor id: 124891571478 – CSDN博客
mongodb pymongo.errors.CursorNotFound: Cursor not found, cursor id: 82792803897 – shaomine – 博客园
【pymongo】mongodb cursor id not valid error – 匡子语 – 博客园
改为:
# for eachDialog in dialogCollection.find(): cursor = dialogCollection.find(no_cursor_timeout=True) for eachDialog in cursor: yield eachDialog cursor.close()
看看效果
抽空再完整跑一遍。
最后确定了,是可以的:
运行了半天(4,5个小时)中间没有再出现错误。
如果不行,到时候再去改为:
.batch_size(1)
试试
或者参考:
https://www.cnblogs.com/dplearning/p/6052291.html
试试:
.find(timeout=False)
【总结】
此处pymongo报错:
pymongo.errors.CursorNotFound: Cursor not found
原因是:
此处的:
for eachDialog in dialogCollection.find():
find会返回多个数据
而这些数据的处理时间,超过了默认最长时间10分钟,所以超时报错了。
解决办法是:
去掉超时时间的设置,改为:
# for eachDialog in dialogCollection.find(): cursor = dialogCollection.find(no_cursor_timeout=True) for eachDialog in cursor: yield eachDialog cursor.close()
即可。
【后记20210831】
根据官网最新文档:
cursor.noCursorTimeout() — MongoDB Manual
no_cursor_timeout=True
已经无效了。
应该改为:
- 先新建一个会话,后续所有操作,都关联上、使用,此会话。
- 每隔一段时间,再去刷新会话
示例代码:
import logging from datetime import datetime import pymongo mongoClient = pymongo.MongoClient('mongodb://127.0.0.1:27017/your_db_name') # every 10 minutes to update session once # Note: should less than 30 minutes = Mongo session defaul timeout time # https://docs.mongodb.com/v5.0/reference/method/cursor.noCursorTimeout/ # RefreshSessionPerSeconds = 10 * 60 RefreshSessionPerSeconds = 8 * 60 def mergeHistorResultToNewCollection(): mongoSession = mongoClient.start_session() # <pymongo.client_session.ClientSession object at 0x1081c5c70> mongoSessionId = mongoSession.session_id # {'id': Binary(b'\xbf\xd8\xd...1\xbb', 4)} mongoDb = mongoSession.client["your_db_name"] # Database(MongoClient(host=['127.0.0.1:27017'], document_class=dict, tz_aware=False, connect=True), 'your_db_name') mongoCollectionOld = mongoDb["collecion_old"] mongoCollectionNew = mongoDb['collecion_new'] # historyAllResultCursor = mongoCollectionOld.find(session=mongoSession) historyAllResultCursor = mongoCollectionOld.find(no_cursor_timeout=True, session=mongoSession) lastUpdateTime = datetime.now() # datetime.datetime(2021, 8, 30, 10, 57, 14, 579328) for curIdx, oldHistoryResult in enumerate(historyAllResultCursor): curTime = datetime.now() # datetime.datetime(2021, 8, 30, 10, 57, 25, 110374) elapsedTime = curTime - lastUpdateTime # datetime.timedelta(seconds=10, microseconds=531046) elapsedTimeSeconds = elapsedTime.total_seconds() # 2.65892 isShouldUpdateSession = elapsedTimeSeconds > RefreshSessionPerSeconds # if (curIdx % RefreshSessionPerNum) == 0: if isShouldUpdateSession: lastUpdateTime = curTime cmdResp = mongoDb.command("refreshSessions", [mongoSessionId], session=mongoSession) logging.info("Called refreshSessions command, resp=%s", cmdResp) # do what you want existedNewResult = mongoCollectionNew.find_one({"shortLink": "http://xxx"}, session=mongoSession) # mongoSession.close() mongoSession.end_session()
相关参考文档:
collection – Collection level operations — PyMongo 3.12.0 documentation
database – Database level operations — PyMongo 3.12.0 documentation
refreshSessions — MongoDB Manual
cursor.noCursorTimeout() — MongoDB Manual
详细解释,可参考我回复的帖子:
node.js – MongoDB – Error: getMore command failed: Cursor not found – Stack Overflow
转载请注明:在路上 » 【已解决】python中mongo出错:pymongo.errors.CursorNotFound: Cursor not found